mach64 driver from git://anongit.freedesktop.org/git/mesa/drm
head: c99566fb810c9d8cae5e9cd39d1772b55e2f514c

---
 drivers/gpu/drm/Kconfig               |   11
 drivers/gpu/drm/Makefile              |    1
 drivers/gpu/drm/mach64/Makefile       |    8
 drivers/gpu/drm/Kconfig               |   11 
 drivers/gpu/drm/Makefile              |    1 
 drivers/gpu/drm/mach64/Makefile       |    8 
 drivers/gpu/drm/mach64/mach64_dma.c   | 1778 ++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/mach64/mach64_drm.h   |  256 ++++
 drivers/gpu/drm/mach64/mach64_drv.c   |  105 ++
 drivers/gpu/drm/mach64/mach64_drv.h   |  859 ++++++++++++++++
 drivers/gpu/drm/mach64/mach64_irq.c   |  159 +++
 drivers/gpu/drm/mach64/mach64_state.c |  910 +++++++++++++++++
 9 files changed, 4087 insertions(+)

--- /dev/null
+++ b/drivers/gpu/drm/mach64/Makefile
@@ -0,0 +1,8 @@
+#
+# Makefile for the drm device driver.  This driver provides support for the
+# Direct Rendering Infrastructure (DRI) in XFree86 4.1.0 and higher.
+
+ccflags-y = -Iinclude/drm
+mach64-y := mach64_drv.o mach64_dma.o mach64_irq.o mach64_state.o
+
+obj-$(CONFIG_DRM_MACH64) += mach64.o
--- /dev/null
+++ b/drivers/gpu/drm/mach64/mach64_dma.c
@@ -0,0 +1,1778 @@
+/* mach64_dma.c -- DMA support for mach64 (Rage Pro) driver -*- linux-c -*- */
+/**
+ * \file mach64_dma.c
+ * DMA support for mach64 (Rage Pro) driver
+ *
+ * \author Gareth Hughes <gareth@valinux.com>
+ * \author Frank C. Earl <fearl@airmail.net>
+ * \author Leif Delgass <ldelgass@retinalburn.net>
+ * \author José Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+/*
+ * Copyright 2000 Gareth Hughes
+ * Copyright 2002 Frank C. Earl
+ * Copyright 2002-2003 Leif Delgass
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT OWNER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "drmP.h"
+#include "drm.h"
+#include "mach64_drm.h"
+#include "mach64_drv.h"
+
+/*******************************************************************/
+/** \name Engine, FIFO control */
+/*@{*/
+
+/**
+ * Waits for free entries in the FIFO.
+ *
+ * \note Most writes to Mach64 registers are automatically routed through
+ * command FIFO which is 16 entry deep. Prior to writing to any draw engine
+ * register one has to ensure that enough FIFO entries are available by calling
+ * this function.  Failure to do so may cause the engine to lock.
+ *
+ * \param dev_priv pointer to device private data structure.
+ * \param entries number of free entries in the FIFO to wait for.
+ *
+ * \returns zero on success, or -EBUSY if the timeout (specificed by
+ * drm_mach64_private::usec_timeout) occurs.
+ */
+int mach64_do_wait_for_fifo(drm_mach64_private_t *dev_priv, int entries)
+{
+	int slots = 0, i;
+
+	for (i = 0; i < dev_priv->usec_timeout; i++) {
+		slots = (MACH64_READ(MACH64_FIFO_STAT) & MACH64_FIFO_SLOT_MASK);
+		if (slots <= (0x8000 >> entries))
+			return 0;
+		DRM_UDELAY(1);
+	}
+
+	DRM_INFO("failed! slots=%d entries=%d\n", slots, entries);
+	return -EBUSY;
+}
+
+/**
+ * Wait for the draw engine to be idle.
+ */
+int mach64_do_wait_for_idle(drm_mach64_private_t *dev_priv)
+{
+	int i, ret;
+
+	ret = mach64_do_wait_for_fifo(dev_priv, 16);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < dev_priv->usec_timeout; i++) {
+		if (!(MACH64_READ(MACH64_GUI_STAT) & MACH64_GUI_ACTIVE))
+			return 0;
+		DRM_UDELAY(1);
+	}
+
+	DRM_INFO("failed! GUI_STAT=0x%08x\n", MACH64_READ(MACH64_GUI_STAT));
+	mach64_dump_ring_info(dev_priv);
+	return -EBUSY;
+}
+
+/**
+ * Wait for free entries in the ring buffer.
+ *
+ * The Mach64 bus master can be configured to act as a virtual FIFO, using a
+ * circular buffer (commonly referred as "ring buffer" in other drivers) with
+ * pointers to engine commands. This allows the CPU to do other things while
+ * the graphics engine is busy, i.e., DMA mode.
+ *
+ * This function should be called before writing new entries to the ring
+ * buffer.
+ *
+ * \param dev_priv pointer to device private data structure.
+ * \param n number of free entries in the ring buffer to wait for.
+ *
+ * \returns zero on success, or -EBUSY if the timeout (specificed by
+ * drm_mach64_private_t::usec_timeout) occurs.
+ *
+ * \sa mach64_dump_ring_info()
+ */
+int mach64_wait_ring(drm_mach64_private_t *dev_priv, int n)
+{
+	drm_mach64_descriptor_ring_t *ring = &dev_priv->ring;
+	int i;
+
+	for (i = 0; i < dev_priv->usec_timeout; i++) {
+		mach64_update_ring_snapshot(dev_priv);
+		if (ring->space >= n) {
+			if (i > 0)
+				DRM_DEBUG("%d usecs\n", i);
+			return 0;
+		}
+		DRM_UDELAY(1);
+	}
+
+	/* FIXME: This is being ignored... */
+	DRM_ERROR("failed!\n");
+	mach64_dump_ring_info(dev_priv);
+	return -EBUSY;
+}
+
+/**
+ * Wait until all DMA requests have been processed...
+ *
+ * \sa mach64_wait_ring()
+ */
+static int mach64_ring_idle(drm_mach64_private_t *dev_priv)
+{
+	drm_mach64_descriptor_ring_t *ring = &dev_priv->ring;
+	u32 head;
+	int i;
+
+	head = ring->head;
+	i = 0;
+	while (i < dev_priv->usec_timeout) {
+		mach64_update_ring_snapshot(dev_priv);
+		if (ring->head == ring->tail &&
+		    !(MACH64_READ(MACH64_GUI_STAT) & MACH64_GUI_ACTIVE)) {
+			if (i > 0)
+				DRM_DEBUG("%d usecs\n", i);
+			return 0;
+		}
+		if (ring->head == head) {
+			++i;
+		} else {
+			head = ring->head;
+			i = 0;
+		}
+		DRM_UDELAY(1);
+	}
+
+	DRM_INFO("failed! GUI_STAT=0x%08x\n", MACH64_READ(MACH64_GUI_STAT));
+	mach64_dump_ring_info(dev_priv);
+	return -EBUSY;
+}
+
+/**
+ * Reset the the ring buffer descriptors.
+ *
+ * \sa mach64_do_engine_reset()
+ */
+static void mach64_ring_reset(drm_mach64_private_t *dev_priv)
+{
+	drm_mach64_descriptor_ring_t *ring = &dev_priv->ring;
+
+	mach64_do_release_used_buffers(dev_priv);
+	ring->head_addr = ring->start_addr;
+	ring->head = ring->tail = 0;
+	ring->space = ring->size;
+
+	MACH64_WRITE(MACH64_BM_GUI_TABLE_CMD,
+		     ring->head_addr | MACH64_CIRCULAR_BUF_SIZE_16KB);
+
+	dev_priv->ring_running = 0;
+}
+
+/**
+ * Ensure the all the queued commands will be processed.
+ */
+int mach64_do_dma_flush(drm_mach64_private_t *dev_priv)
+{
+	/* FIXME: It's not necessary to wait for idle when flushing
+	 * we just need to ensure the ring will be completely processed
+	 * in finite time without another ioctl
+	 */
+	return mach64_ring_idle(dev_priv);
+}
+
+/**
+ * Stop all DMA activity.
+ */
+int mach64_do_dma_idle(drm_mach64_private_t *dev_priv)
+{
+	int ret;
+
+	/* wait for completion */
+	if ((ret = mach64_ring_idle(dev_priv)) < 0) {
+		DRM_ERROR("failed BM_GUI_TABLE=0x%08x tail: %u\n",
+			  MACH64_READ(MACH64_BM_GUI_TABLE),
+			  dev_priv->ring.tail);
+		return ret;
+	}
+
+	mach64_ring_stop(dev_priv);
+
+	/* clean up after pass */
+	mach64_do_release_used_buffers(dev_priv);
+	return 0;
+}
+
+/**
+ * Reset the engine.  This will stop the DMA if it is running.
+ */
+int mach64_do_engine_reset(drm_mach64_private_t *dev_priv)
+{
+	u32 tmp;
+
+	DRM_DEBUG("\n");
+
+	/* Kill off any outstanding DMA transfers.
+	 */
+	tmp = MACH64_READ(MACH64_BUS_CNTL);
+	MACH64_WRITE(MACH64_BUS_CNTL, tmp | MACH64_BUS_MASTER_DIS);
+
+	/* Reset the GUI engine (high to low transition).
+	 */
+	tmp = MACH64_READ(MACH64_GEN_TEST_CNTL);
+	MACH64_WRITE(MACH64_GEN_TEST_CNTL, tmp & ~MACH64_GUI_ENGINE_ENABLE);
+	/* Enable the GUI engine
+	 */
+	tmp = MACH64_READ(MACH64_GEN_TEST_CNTL);
+	MACH64_WRITE(MACH64_GEN_TEST_CNTL, tmp | MACH64_GUI_ENGINE_ENABLE);
+
+	/* ensure engine is not locked up by clearing any FIFO or HOST errors
+	 */
+	tmp = MACH64_READ(MACH64_BUS_CNTL);
+	MACH64_WRITE(MACH64_BUS_CNTL, tmp | 0x00a00000);
+
+	/* Once GUI engine is restored, disable bus mastering */
+	MACH64_WRITE(MACH64_SRC_CNTL, 0);
+
+	/* Reset descriptor ring */
+	mach64_ring_reset(dev_priv);
+
+	return 0;
+}
+
+/*@}*/
+
+
+/*******************************************************************/
+/** \name Debugging output */
+/*@{*/
+
+/**
+ * Dump engine registers values.
+ */
+void mach64_dump_engine_info(drm_mach64_private_t *dev_priv)
+{
+	DRM_INFO("\n");
+	if (!dev_priv->is_pci) {
+		DRM_INFO("           AGP_BASE = 0x%08x\n",
+			 MACH64_READ(MACH64_AGP_BASE));
+		DRM_INFO("           AGP_CNTL = 0x%08x\n",
+			 MACH64_READ(MACH64_AGP_CNTL));
+	}
+	DRM_INFO("     ALPHA_TST_CNTL = 0x%08x\n",
+		 MACH64_READ(MACH64_ALPHA_TST_CNTL));
+	DRM_INFO("\n");
+	DRM_INFO("         BM_COMMAND = 0x%08x\n",
+		 MACH64_READ(MACH64_BM_COMMAND));
+	DRM_INFO("BM_FRAME_BUF_OFFSET = 0x%08x\n",
+		 MACH64_READ(MACH64_BM_FRAME_BUF_OFFSET));
+	DRM_INFO("       BM_GUI_TABLE = 0x%08x\n",
+		 MACH64_READ(MACH64_BM_GUI_TABLE));
+	DRM_INFO("          BM_STATUS = 0x%08x\n",
+		 MACH64_READ(MACH64_BM_STATUS));
+	DRM_INFO(" BM_SYSTEM_MEM_ADDR = 0x%08x\n",
+		 MACH64_READ(MACH64_BM_SYSTEM_MEM_ADDR));
+	DRM_INFO("    BM_SYSTEM_TABLE = 0x%08x\n",
+		 MACH64_READ(MACH64_BM_SYSTEM_TABLE));
+	DRM_INFO("           BUS_CNTL = 0x%08x\n",
+		 MACH64_READ(MACH64_BUS_CNTL));
+	DRM_INFO("\n");
+	/* DRM_INFO( "         CLOCK_CNTL = 0x%08x\n", MACH64_READ( MACH64_CLOCK_CNTL ) ); */
+	DRM_INFO("        CLR_CMP_CLR = 0x%08x\n",
+		 MACH64_READ(MACH64_CLR_CMP_CLR));
+	DRM_INFO("       CLR_CMP_CNTL = 0x%08x\n",
+		 MACH64_READ(MACH64_CLR_CMP_CNTL));
+	/* DRM_INFO( "        CLR_CMP_MSK = 0x%08x\n", MACH64_READ( MACH64_CLR_CMP_MSK ) ); */
+	DRM_INFO("     CONFIG_CHIP_ID = 0x%08x\n",
+		 MACH64_READ(MACH64_CONFIG_CHIP_ID));
+	DRM_INFO("        CONFIG_CNTL = 0x%08x\n",
+		 MACH64_READ(MACH64_CONFIG_CNTL));
+	DRM_INFO("       CONFIG_STAT0 = 0x%08x\n",
+		 MACH64_READ(MACH64_CONFIG_STAT0));
+	DRM_INFO("       CONFIG_STAT1 = 0x%08x\n",
+		 MACH64_READ(MACH64_CONFIG_STAT1));
+	DRM_INFO("       CONFIG_STAT2 = 0x%08x\n",
+		 MACH64_READ(MACH64_CONFIG_STAT2));
+	DRM_INFO("            CRC_SIG = 0x%08x\n", MACH64_READ(MACH64_CRC_SIG));
+	DRM_INFO("  CUSTOM_MACRO_CNTL = 0x%08x\n",
+		 MACH64_READ(MACH64_CUSTOM_MACRO_CNTL));
+	DRM_INFO("\n");
+	/* DRM_INFO( "           DAC_CNTL = 0x%08x\n", MACH64_READ( MACH64_DAC_CNTL ) ); */
+	/* DRM_INFO( "           DAC_REGS = 0x%08x\n", MACH64_READ( MACH64_DAC_REGS ) ); */
+	DRM_INFO("        DP_BKGD_CLR = 0x%08x\n",
+		 MACH64_READ(MACH64_DP_BKGD_CLR));
+	DRM_INFO("        DP_FRGD_CLR = 0x%08x\n",
+		 MACH64_READ(MACH64_DP_FRGD_CLR));
+	DRM_INFO("             DP_MIX = 0x%08x\n", MACH64_READ(MACH64_DP_MIX));
+	DRM_INFO("       DP_PIX_WIDTH = 0x%08x\n",
+		 MACH64_READ(MACH64_DP_PIX_WIDTH));
+	DRM_INFO("             DP_SRC = 0x%08x\n", MACH64_READ(MACH64_DP_SRC));
+	DRM_INFO("      DP_WRITE_MASK = 0x%08x\n",
+		 MACH64_READ(MACH64_DP_WRITE_MASK));
+	DRM_INFO("         DSP_CONFIG = 0x%08x\n",
+		 MACH64_READ(MACH64_DSP_CONFIG));
+	DRM_INFO("         DSP_ON_OFF = 0x%08x\n",
+		 MACH64_READ(MACH64_DSP_ON_OFF));
+	DRM_INFO("           DST_CNTL = 0x%08x\n",
+		 MACH64_READ(MACH64_DST_CNTL));
+	DRM_INFO("      DST_OFF_PITCH = 0x%08x\n",
+		 MACH64_READ(MACH64_DST_OFF_PITCH));
+	DRM_INFO("\n");
+	/* DRM_INFO( "       EXT_DAC_REGS = 0x%08x\n", MACH64_READ( MACH64_EXT_DAC_REGS ) ); */
+	DRM_INFO("       EXT_MEM_CNTL = 0x%08x\n",
+		 MACH64_READ(MACH64_EXT_MEM_CNTL));
+	DRM_INFO("\n");
+	DRM_INFO("          FIFO_STAT = 0x%08x\n",
+		 MACH64_READ(MACH64_FIFO_STAT));
+	DRM_INFO("\n");
+	DRM_INFO("      GEN_TEST_CNTL = 0x%08x\n",
+		 MACH64_READ(MACH64_GEN_TEST_CNTL));
+	/* DRM_INFO( "              GP_IO = 0x%08x\n", MACH64_READ( MACH64_GP_IO ) ); */
+	DRM_INFO("   GUI_CMDFIFO_DATA = 0x%08x\n",
+		 MACH64_READ(MACH64_GUI_CMDFIFO_DATA));
+	DRM_INFO("  GUI_CMDFIFO_DEBUG = 0x%08x\n",
+		 MACH64_READ(MACH64_GUI_CMDFIFO_DEBUG));
+	DRM_INFO("           GUI_CNTL = 0x%08x\n",
+		 MACH64_READ(MACH64_GUI_CNTL));
+	DRM_INFO("           GUI_STAT = 0x%08x\n",
+		 MACH64_READ(MACH64_GUI_STAT));
+	DRM_INFO("      GUI_TRAJ_CNTL = 0x%08x\n",
+		 MACH64_READ(MACH64_GUI_TRAJ_CNTL));
+	DRM_INFO("\n");
+	DRM_INFO("          HOST_CNTL = 0x%08x\n",
+		 MACH64_READ(MACH64_HOST_CNTL));
+	DRM_INFO("           HW_DEBUG = 0x%08x\n",
+		 MACH64_READ(MACH64_HW_DEBUG));
+	DRM_INFO("\n");
+	DRM_INFO("    MEM_ADDR_CONFIG = 0x%08x\n",
+		 MACH64_READ(MACH64_MEM_ADDR_CONFIG));
+	DRM_INFO("       MEM_BUF_CNTL = 0x%08x\n",
+		 MACH64_READ(MACH64_MEM_BUF_CNTL));
+	DRM_INFO("\n");
+	DRM_INFO("           PAT_REG0 = 0x%08x\n",
+		 MACH64_READ(MACH64_PAT_REG0));
+	DRM_INFO("           PAT_REG1 = 0x%08x\n",
+		 MACH64_READ(MACH64_PAT_REG1));
+	DRM_INFO("\n");
+	DRM_INFO("            SC_LEFT = 0x%08x\n", MACH64_READ(MACH64_SC_LEFT));
+	DRM_INFO("           SC_RIGHT = 0x%08x\n",
+		 MACH64_READ(MACH64_SC_RIGHT));
+	DRM_INFO("             SC_TOP = 0x%08x\n", MACH64_READ(MACH64_SC_TOP));
+	DRM_INFO("          SC_BOTTOM = 0x%08x\n",
+		 MACH64_READ(MACH64_SC_BOTTOM));
+	DRM_INFO("\n");
+	DRM_INFO("      SCALE_3D_CNTL = 0x%08x\n",
+		 MACH64_READ(MACH64_SCALE_3D_CNTL));
+	DRM_INFO("       SCRATCH_REG0 = 0x%08x\n",
+		 MACH64_READ(MACH64_SCRATCH_REG0));
+	DRM_INFO("       SCRATCH_REG1 = 0x%08x\n",
+		 MACH64_READ(MACH64_SCRATCH_REG1));
+	DRM_INFO("         SETUP_CNTL = 0x%08x\n",
+		 MACH64_READ(MACH64_SETUP_CNTL));
+	DRM_INFO("           SRC_CNTL = 0x%08x\n",
+		 MACH64_READ(MACH64_SRC_CNTL));
+	DRM_INFO("\n");
+	DRM_INFO("           TEX_CNTL = 0x%08x\n",
+		 MACH64_READ(MACH64_TEX_CNTL));
+	DRM_INFO("     TEX_SIZE_PITCH = 0x%08x\n",
+		 MACH64_READ(MACH64_TEX_SIZE_PITCH));
+	DRM_INFO("       TIMER_CONFIG = 0x%08x\n",
+		 MACH64_READ(MACH64_TIMER_CONFIG));
+	DRM_INFO("\n");
+	DRM_INFO("             Z_CNTL = 0x%08x\n", MACH64_READ(MACH64_Z_CNTL));
+	DRM_INFO("        Z_OFF_PITCH = 0x%08x\n",
+		 MACH64_READ(MACH64_Z_OFF_PITCH));
+	DRM_INFO("\n");
+}
+
+#define MACH64_DUMP_CONTEXT	3
+
+/**
+ * Used by mach64_dump_ring_info() to dump the contents of the current buffer
+ * pointed by the ring head.
+ */
+static void mach64_dump_buf_info(drm_mach64_private_t *dev_priv,
+				 struct drm_buf *buf)
+{
+	u32 addr = GETBUFADDR(buf);
+	u32 used = buf->used >> 2;
+	u32 sys_addr = MACH64_READ(MACH64_BM_SYSTEM_MEM_ADDR);
+	u32 *p = GETBUFPTR(buf);
+	int skipped = 0;
+
+	DRM_INFO("buffer contents:\n");
+
+	while (used) {
+		u32 reg, count;
+
+		reg = le32_to_cpu(*p++);
+		if (addr <= GETBUFADDR(buf) + MACH64_DUMP_CONTEXT * 4 ||
+		    (addr >= sys_addr - MACH64_DUMP_CONTEXT * 4 &&
+		     addr <= sys_addr + MACH64_DUMP_CONTEXT * 4) ||
+		    addr >=
+		    GETBUFADDR(buf) + buf->used - MACH64_DUMP_CONTEXT * 4) {
+			DRM_INFO("%08x:  0x%08x\n", addr, reg);
+		}
+		addr += 4;
+		used--;
+
+		count = (reg >> 16) + 1;
+		reg = reg & 0xffff;
+		reg = MMSELECT(reg);
+		while (count && used) {
+			if (addr <= GETBUFADDR(buf) + MACH64_DUMP_CONTEXT * 4 ||
+			    (addr >= sys_addr - MACH64_DUMP_CONTEXT * 4 &&
+			     addr <= sys_addr + MACH64_DUMP_CONTEXT * 4) ||
+			    addr >=
+			    GETBUFADDR(buf) + buf->used -
+			    MACH64_DUMP_CONTEXT * 4) {
+				DRM_INFO("%08x:    0x%04x = 0x%08x\n", addr,
+					 reg, le32_to_cpu(*p));
+				skipped = 0;
+			} else {
+				if (!skipped) {
+					DRM_INFO("  ...\n");
+					skipped = 1;
+				}
+			}
+			p++;
+			addr += 4;
+			used--;
+
+			reg += 4;
+			count--;
+		}
+	}
+
+	DRM_INFO("\n");
+}
+
+/**
+ * Dump the ring state and contents, including the contents of the buffer being
+ * processed by the graphics engine.
+ */
+void mach64_dump_ring_info(drm_mach64_private_t *dev_priv)
+{
+	drm_mach64_descriptor_ring_t *ring = &dev_priv->ring;
+	int i, skipped;
+
+	DRM_INFO("\n");
+
+	DRM_INFO("ring contents:\n");
+	DRM_INFO("  head_addr: 0x%08x head: %u tail: %u\n\n",
+		 ring->head_addr, ring->head, ring->tail);
+
+	skipped = 0;
+	for (i = 0; i < ring->size / sizeof(u32); i += 4) {
+		if (i <= MACH64_DUMP_CONTEXT * 4 ||
+		    i >= ring->size / sizeof(u32) - MACH64_DUMP_CONTEXT * 4 ||
+		    (i >= ring->tail - MACH64_DUMP_CONTEXT * 4 &&
+		     i <= ring->tail + MACH64_DUMP_CONTEXT * 4) ||
+		    (i >= ring->head - MACH64_DUMP_CONTEXT * 4 &&
+		     i <= ring->head + MACH64_DUMP_CONTEXT * 4)) {
+			DRM_INFO("  0x%08x:  0x%08x 0x%08x 0x%08x 0x%08x%s%s\n",
+				 (u32)(ring->start_addr + i * sizeof(u32)),
+				 le32_to_cpu(((u32 *) ring->start)[i + 0]),
+				 le32_to_cpu(((u32 *) ring->start)[i + 1]),
+				 le32_to_cpu(((u32 *) ring->start)[i + 2]),
+				 le32_to_cpu(((u32 *) ring->start)[i + 3]),
+				 i == ring->head ? " (head)" : "",
+				 i == ring->tail ? " (tail)" : "");
+			skipped = 0;
+		} else {
+			if (!skipped) {
+				DRM_INFO("  ...\n");
+				skipped = 1;
+			}
+		}
+	}
+
+	DRM_INFO("\n");
+
+	if (ring->head >= 0 && ring->head < ring->size / sizeof(u32)) {
+		struct list_head *ptr;
+		u32 addr = le32_to_cpu(((u32 *) ring->start)[ring->head + 1]);
+
+		list_for_each(ptr, &dev_priv->pending) {
+			drm_mach64_freelist_t *entry =
+			    list_entry(ptr, drm_mach64_freelist_t, list);
+			struct drm_buf *buf = entry->buf;
+
+			u32 buf_addr = GETBUFADDR(buf);
+
+			if (buf_addr <= addr && addr < buf_addr + buf->used)
+				mach64_dump_buf_info(dev_priv, buf);
+		}
+	}
+
+	DRM_INFO("\n");
+	DRM_INFO("       BM_GUI_TABLE = 0x%08x\n",
+		 MACH64_READ(MACH64_BM_GUI_TABLE));
+	DRM_INFO("\n");
+	DRM_INFO("BM_FRAME_BUF_OFFSET = 0x%08x\n",
+		 MACH64_READ(MACH64_BM_FRAME_BUF_OFFSET));
+	DRM_INFO(" BM_SYSTEM_MEM_ADDR = 0x%08x\n",
+		 MACH64_READ(MACH64_BM_SYSTEM_MEM_ADDR));
+	DRM_INFO("         BM_COMMAND = 0x%08x\n",
+		 MACH64_READ(MACH64_BM_COMMAND));
+	DRM_INFO("\n");
+	DRM_INFO("          BM_STATUS = 0x%08x\n",
+		 MACH64_READ(MACH64_BM_STATUS));
+	DRM_INFO("           BUS_CNTL = 0x%08x\n",
+		 MACH64_READ(MACH64_BUS_CNTL));
+	DRM_INFO("          FIFO_STAT = 0x%08x\n",
+		 MACH64_READ(MACH64_FIFO_STAT));
+	DRM_INFO("           GUI_STAT = 0x%08x\n",
+		 MACH64_READ(MACH64_GUI_STAT));
+	DRM_INFO("           SRC_CNTL = 0x%08x\n",
+		 MACH64_READ(MACH64_SRC_CNTL));
+}
+
+/*@}*/
+
+
+/*******************************************************************/
+/** \name DMA descriptor ring macros */
+/*@{*/
+
+/**
+ * Add the end mark to the ring's new tail position.
+ *
+ * The bus master engine will keep processing the DMA buffers listed in the ring
+ * until it finds this mark, making it stop.
+ *
+ * \sa mach64_clear_dma_eol
+ */
+static __inline__ void mach64_set_dma_eol(volatile u32 *addr)
+{
+#if defined(__i386__)
+	int nr = 31;
+
+	/* Taken from include/asm-i386/bitops.h linux header */
+	__asm__ __volatile__("lock;" "btsl %1,%0":"=m"(*addr)
+			     :"Ir"(nr));
+#elif defined(__powerpc__)
+	u32 old;
+	u32 mask = cpu_to_le32(MACH64_DMA_EOL);
+
+	/* Taken from the include/asm-ppc/bitops.h linux header */
+	__asm__ __volatile__("\n\
+1:	lwarx	%0,0,%3 \n\
+	or	%0,%0,%2 \n\
+	stwcx.	%0,0,%3 \n\
+	bne-	1b":"=&r"(old), "=m"(*addr)
+			     :"r"(mask), "r"(addr), "m"(*addr)
+			     :"cc");
+#elif defined(__alpha__)
+	u32 temp;
+	u32 mask = MACH64_DMA_EOL;
+
+	/* Taken from the include/asm-alpha/bitops.h linux header */
+	__asm__ __volatile__("1:	ldl_l %0,%3\n"
+			     "	bis %0,%2,%0\n"
+			     "	stl_c %0,%1\n"
+			     "	beq %0,2f\n"
+			     ".subsection 2\n"
+			     "2:	br 1b\n"
+			     ".previous":"=&r"(temp), "=m"(*addr)
+			     :"Ir"(mask), "m"(*addr));
+#else
+	u32 mask = cpu_to_le32(MACH64_DMA_EOL);
+
+	*addr |= mask;
+#endif
+}
+
+/**
+ * Remove the end mark from the ring's old tail position.
+ *
+ * It should be called after calling mach64_set_dma_eol to mark the ring's new
+ * tail position.
+ *
+ * We update the end marks while the bus master engine is in operation. Since
+ * the bus master engine may potentially be reading from the same position
+ * that we write, we must change atomically to avoid having intermediary bad
+ * data.
+ */
+static __inline__ void mach64_clear_dma_eol(volatile u32 *addr)
+{
+#if defined(__i386__)
+	int nr = 31;
+
+	/* Taken from include/asm-i386/bitops.h linux header */
+	__asm__ __volatile__("lock;" "btrl %1,%0":"=m"(*addr)
+			     :"Ir"(nr));
+#elif defined(__powerpc__)
+	u32 old;
+	u32 mask = cpu_to_le32(MACH64_DMA_EOL);
+
+	/* Taken from the include/asm-ppc/bitops.h linux header */
+	__asm__ __volatile__("\n\
+1:	lwarx	%0,0,%3 \n\
+	andc	%0,%0,%2 \n\
+	stwcx.	%0,0,%3 \n\
+	bne-	1b":"=&r"(old), "=m"(*addr)
+			     :"r"(mask), "r"(addr), "m"(*addr)
+			     :"cc");
+#elif defined(__alpha__)
+	u32 temp;
+	u32 mask = ~MACH64_DMA_EOL;
+
+	/* Taken from the include/asm-alpha/bitops.h linux header */
+	__asm__ __volatile__("1:	ldl_l %0,%3\n"
+			     "	and %0,%2,%0\n"
+			     "	stl_c %0,%1\n"
+			     "	beq %0,2f\n"
+			     ".subsection 2\n"
+			     "2:	br 1b\n"
+			     ".previous":"=&r"(temp), "=m"(*addr)
+			     :"Ir"(mask), "m"(*addr));
+#else
+	u32 mask = cpu_to_le32(~MACH64_DMA_EOL);
+
+	*addr &= mask;
+#endif
+}
+
+#define RING_LOCALS							\
+	int _ring_tail, _ring_write; unsigned int _ring_mask; volatile u32 *_ring
+
+#define RING_WRITE_OFS  _ring_write
+
+#define BEGIN_RING(n)							\
+	do {								\
+		if (MACH64_VERBOSE) {					\
+			DRM_INFO( "BEGIN_RING( %d ) \n",		\
+				  (n) );				\
+		}							\
+		if (dev_priv->ring.space <= (n) * sizeof(u32)) {	\
+			int ret;					\
+			if ((ret = mach64_wait_ring( dev_priv, (n) * sizeof(u32))) < 0 ) { \
+				DRM_ERROR( "wait_ring failed, resetting engine\n"); \
+				mach64_dump_engine_info( dev_priv );	\
+				mach64_do_engine_reset( dev_priv );	\
+				return ret;				\
+			}						\
+		}							\
+		dev_priv->ring.space -= (n) * sizeof(u32);		\
+		_ring = (u32 *) dev_priv->ring.start;			\
+		_ring_tail = _ring_write = dev_priv->ring.tail;		\
+		_ring_mask = dev_priv->ring.tail_mask;			\
+	} while (0)
+
+#define OUT_RING( x )						\
+do {								\
+	if (MACH64_VERBOSE) {					\
+		DRM_INFO( "   OUT_RING( 0x%08x ) at 0x%x\n",	\
+			   (unsigned int)(x), _ring_write );	\
+	}							\
+	_ring[_ring_write++] = cpu_to_le32( x );		\
+	_ring_write &= _ring_mask;				\
+} while (0)
+
+#define ADVANCE_RING()							\
+do {									\
+	if (MACH64_VERBOSE) {						\
+		DRM_INFO( "ADVANCE_RING() wr=0x%06x tail=0x%06x\n",	\
+			  _ring_write, _ring_tail );			\
+	}								\
+	DRM_MEMORYBARRIER();						\
+	mach64_clear_dma_eol( &_ring[(_ring_tail - 2) & _ring_mask] );	\
+	DRM_MEMORYBARRIER();						\
+	dev_priv->ring.tail = _ring_write;				\
+	mach64_ring_tick( dev_priv, &(dev_priv)->ring );		\
+} while (0)
+
+/**
+ * Queue a DMA buffer of registers writes into the ring buffer.
+ */
+int mach64_add_buf_to_ring(drm_mach64_private_t *dev_priv,
+                           drm_mach64_freelist_t *entry)
+{
+	int bytes, pages, remainder;
+	u32 address, page;
+	int i;
+	struct drm_buf *buf = entry->buf;
+	RING_LOCALS;
+
+	bytes = buf->used;
+	address = GETBUFADDR( buf );
+	pages = (bytes + MACH64_DMA_CHUNKSIZE - 1) / MACH64_DMA_CHUNKSIZE;
+
+	BEGIN_RING( pages * 4 );
+
+	for ( i = 0 ; i < pages-1 ; i++ ) {
+		page = address + i * MACH64_DMA_CHUNKSIZE;
+		OUT_RING( MACH64_APERTURE_OFFSET + MACH64_BM_ADDR );
+		OUT_RING( page );
+		OUT_RING( MACH64_DMA_CHUNKSIZE | MACH64_DMA_HOLD_OFFSET );
+		OUT_RING( 0 );
+	}
+
+	/* generate the final descriptor for any remaining commands in this buffer */
+	page = address + i * MACH64_DMA_CHUNKSIZE;
+	remainder = bytes - i * MACH64_DMA_CHUNKSIZE;
+
+	/* Save dword offset of last descriptor for this buffer.
+	 * This is needed to check for completion of the buffer in freelist_get
+	 */
+	entry->ring_ofs = RING_WRITE_OFS;
+
+	OUT_RING( MACH64_APERTURE_OFFSET + MACH64_BM_ADDR );
+	OUT_RING( page );
+	OUT_RING( remainder | MACH64_DMA_HOLD_OFFSET | MACH64_DMA_EOL );
+	OUT_RING( 0 );
+
+	ADVANCE_RING();
+
+	return 0;
+}
+
+/**
+ * Queue DMA buffer controlling host data tranfers (e.g., blit).
+ *
+ * Almost identical to mach64_add_buf_to_ring.
+ */
+int mach64_add_hostdata_buf_to_ring(drm_mach64_private_t *dev_priv,
+                                    drm_mach64_freelist_t *entry)
+{
+	int bytes, pages, remainder;
+	u32 address, page;
+	int i;
+	struct drm_buf *buf = entry->buf;
+	RING_LOCALS;
+
+	bytes = buf->used - MACH64_HOSTDATA_BLIT_OFFSET;
+	pages = (bytes + MACH64_DMA_CHUNKSIZE - 1) / MACH64_DMA_CHUNKSIZE;
+	address = GETBUFADDR( buf );
+
+	BEGIN_RING( 4 + pages * 4 );
+
+	OUT_RING( MACH64_APERTURE_OFFSET + MACH64_BM_ADDR );
+	OUT_RING( address );
+	OUT_RING( MACH64_HOSTDATA_BLIT_OFFSET | MACH64_DMA_HOLD_OFFSET );
+	OUT_RING( 0 );
+	address += MACH64_HOSTDATA_BLIT_OFFSET;
+
+	for ( i = 0 ; i < pages-1 ; i++ ) {
+		page = address + i * MACH64_DMA_CHUNKSIZE;
+		OUT_RING( MACH64_APERTURE_OFFSET + MACH64_BM_HOSTDATA );
+		OUT_RING( page );
+		OUT_RING( MACH64_DMA_CHUNKSIZE | MACH64_DMA_HOLD_OFFSET );
+		OUT_RING( 0 );
+	}
+
+	/* generate the final descriptor for any remaining commands in this buffer */
+	page = address + i * MACH64_DMA_CHUNKSIZE;
+	remainder = bytes - i * MACH64_DMA_CHUNKSIZE;
+
+	/* Save dword offset of last descriptor for this buffer.
+	 * This is needed to check for completion of the buffer in freelist_get
+	 */
+	entry->ring_ofs = RING_WRITE_OFS;
+
+	OUT_RING( MACH64_APERTURE_OFFSET + MACH64_BM_HOSTDATA );
+	OUT_RING( page );
+	OUT_RING( remainder | MACH64_DMA_HOLD_OFFSET | MACH64_DMA_EOL );
+	OUT_RING( 0 );
+
+	ADVANCE_RING();
+
+	return 0;
+}
+
+/*@}*/
+
+
+/*******************************************************************/
+/** \name DMA test and initialization */
+/*@{*/
+
+/**
+ * Perform a simple DMA operation using the pattern registers to test whether
+ * DMA works.
+ *
+ * \return zero if successful.
+ *
+ * \note This function was the testbed for many experiences regarding Mach64
+ * DMA operation. It is left here since it so tricky to get DMA operating
+ * properly in some architectures and hardware.
+ */
+static int mach64_bm_dma_test(struct drm_device * dev)
+{
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+	drm_dma_handle_t *cpu_addr_dmah;
+	u32 data_addr;
+	u32 *table, *data;
+	u32 expected[2];
+	u32 src_cntl, pat_reg0, pat_reg1;
+	int i, count, failed;
+
+	DRM_DEBUG("\n");
+
+	table = (u32 *) dev_priv->ring.start;
+
+	/* FIXME: get a dma buffer from the freelist here */
+	DRM_DEBUG("Allocating data memory ...\n");
+#ifdef __FreeBSD__
+	DRM_UNLOCK();
+#endif
+	cpu_addr_dmah =
+	    drm_pci_alloc(dev, 0x1000, 0x1000, 0xfffffffful);
+#ifdef __FreeBSD__
+	DRM_LOCK();
+#endif
+	if (!cpu_addr_dmah) {
+		DRM_INFO("data-memory allocation failed!\n");
+		return -ENOMEM;
+	} else {
+		data = (u32 *) cpu_addr_dmah->vaddr;
+		data_addr = (u32) cpu_addr_dmah->busaddr;
+	}
+
+	/* Save the X server's value for SRC_CNTL and restore it
+	 * in case our test fails.  This prevents the X server
+	 * from disabling it's cache for this register
+	 */
+	src_cntl = MACH64_READ(MACH64_SRC_CNTL);
+	pat_reg0 = MACH64_READ(MACH64_PAT_REG0);
+	pat_reg1 = MACH64_READ(MACH64_PAT_REG1);
+
+	mach64_do_wait_for_fifo(dev_priv, 3);
+
+	MACH64_WRITE(MACH64_SRC_CNTL, 0);
+	MACH64_WRITE(MACH64_PAT_REG0, 0x11111111);
+	MACH64_WRITE(MACH64_PAT_REG1, 0x11111111);
+
+	mach64_do_wait_for_idle(dev_priv);
+
+	for (i = 0; i < 2; i++) {
+		u32 reg;
+		reg = MACH64_READ((MACH64_PAT_REG0 + i * 4));
+		DRM_DEBUG("(Before DMA Transfer) reg %d = 0x%08x\n", i, reg);
+		if (reg != 0x11111111) {
+			DRM_INFO("Error initializing test registers\n");
+			DRM_INFO("resetting engine ...\n");
+			mach64_do_engine_reset(dev_priv);
+			DRM_INFO("freeing data buffer memory.\n");
+			drm_pci_free(dev, cpu_addr_dmah);
+			return -EIO;
+		}
+	}
+
+	/* fill up a buffer with sets of 2 consecutive writes starting with PAT_REG0 */
+	count = 0;
+
+	data[count++] = cpu_to_le32(DMAREG(MACH64_PAT_REG0) | (1 << 16));
+	data[count++] = expected[0] = 0x22222222;
+	data[count++] = expected[1] = 0xaaaaaaaa;
+
+	while (count < 1020) {
+		data[count++] =
+		    cpu_to_le32(DMAREG(MACH64_PAT_REG0) | (1 << 16));
+		data[count++] = 0x22222222;
+		data[count++] = 0xaaaaaaaa;
+	}
+	data[count++] = cpu_to_le32(DMAREG(MACH64_SRC_CNTL) | (0 << 16));
+	data[count++] = 0;
+
+	DRM_DEBUG("Preparing table ...\n");
+	table[MACH64_DMA_FRAME_BUF_OFFSET] = cpu_to_le32(MACH64_BM_ADDR +
+							 MACH64_APERTURE_OFFSET);
+	table[MACH64_DMA_SYS_MEM_ADDR] = cpu_to_le32(data_addr);
+	table[MACH64_DMA_COMMAND] = cpu_to_le32(count * sizeof(u32)
+						| MACH64_DMA_HOLD_OFFSET
+						| MACH64_DMA_EOL);
+	table[MACH64_DMA_RESERVED] = 0;
+
+	DRM_DEBUG("table[0] = 0x%08x\n", table[0]);
+	DRM_DEBUG("table[1] = 0x%08x\n", table[1]);
+	DRM_DEBUG("table[2] = 0x%08x\n", table[2]);
+	DRM_DEBUG("table[3] = 0x%08x\n", table[3]);
+
+	for (i = 0; i < 6; i++) {
+		DRM_DEBUG(" data[%d] = 0x%08x\n", i, data[i]);
+	}
+	DRM_DEBUG(" ...\n");
+	for (i = count - 5; i < count; i++) {
+		DRM_DEBUG(" data[%d] = 0x%08x\n", i, data[i]);
+	}
+
+	DRM_MEMORYBARRIER();
+
+	DRM_DEBUG("waiting for idle...\n");
+	if ((i = mach64_do_wait_for_idle(dev_priv))) {
+		DRM_INFO("mach64_do_wait_for_idle failed (result=%d)\n", i);
+		DRM_INFO("resetting engine ...\n");
+		mach64_do_engine_reset(dev_priv);
+		mach64_do_wait_for_fifo(dev_priv, 3);
+		MACH64_WRITE(MACH64_SRC_CNTL, src_cntl);
+		MACH64_WRITE(MACH64_PAT_REG0, pat_reg0);
+		MACH64_WRITE(MACH64_PAT_REG1, pat_reg1);
+		DRM_INFO("freeing data buffer memory.\n");
+		drm_pci_free(dev, cpu_addr_dmah);
+		return i;
+	}
+	DRM_DEBUG("waiting for idle...done\n");
+
+	DRM_DEBUG("BUS_CNTL = 0x%08x\n", MACH64_READ(MACH64_BUS_CNTL));
+	DRM_DEBUG("SRC_CNTL = 0x%08x\n", MACH64_READ(MACH64_SRC_CNTL));
+	DRM_DEBUG("\n");
+	DRM_DEBUG("data bus addr = 0x%08x\n", data_addr);
+	DRM_DEBUG("table bus addr = 0x%08x\n", dev_priv->ring.start_addr);
+
+	DRM_DEBUG("starting DMA transfer...\n");
+	MACH64_WRITE(MACH64_BM_GUI_TABLE_CMD,
+		     dev_priv->ring.start_addr | MACH64_CIRCULAR_BUF_SIZE_16KB);
+
+	MACH64_WRITE(MACH64_SRC_CNTL,
+		     MACH64_SRC_BM_ENABLE | MACH64_SRC_BM_SYNC |
+		     MACH64_SRC_BM_OP_SYSTEM_TO_REG);
+
+	/* Kick off the transfer */
+	DRM_DEBUG("starting DMA transfer... done.\n");
+	MACH64_WRITE(MACH64_DST_HEIGHT_WIDTH, 0);
+
+	DRM_DEBUG("waiting for idle...\n");
+
+	if ((i = mach64_do_wait_for_idle(dev_priv))) {
+		/* engine locked up, dump register state and reset */
+		DRM_INFO("mach64_do_wait_for_idle failed (result=%d)\n", i);
+		mach64_dump_engine_info(dev_priv);
+		DRM_INFO("resetting engine ...\n");
+		mach64_do_engine_reset(dev_priv);
+		mach64_do_wait_for_fifo(dev_priv, 3);
+		MACH64_WRITE(MACH64_SRC_CNTL, src_cntl);
+		MACH64_WRITE(MACH64_PAT_REG0, pat_reg0);
+		MACH64_WRITE(MACH64_PAT_REG1, pat_reg1);
+		DRM_INFO("freeing data buffer memory.\n");
+		drm_pci_free(dev, cpu_addr_dmah);
+		return i;
+	}
+
+	DRM_DEBUG("waiting for idle...done\n");
+
+	/* restore SRC_CNTL */
+	mach64_do_wait_for_fifo(dev_priv, 1);
+	MACH64_WRITE(MACH64_SRC_CNTL, src_cntl);
+
+	failed = 0;
+
+	/* Check register values to see if the GUI master operation succeeded */
+	for (i = 0; i < 2; i++) {
+		u32 reg;
+		reg = MACH64_READ((MACH64_PAT_REG0 + i * 4));
+		DRM_DEBUG("(After DMA Transfer) reg %d = 0x%08x\n", i, reg);
+		if (reg != expected[i]) {
+			failed = -1;
+		}
+	}
+
+	/* restore pattern registers */
+	mach64_do_wait_for_fifo(dev_priv, 2);
+	MACH64_WRITE(MACH64_PAT_REG0, pat_reg0);
+	MACH64_WRITE(MACH64_PAT_REG1, pat_reg1);
+
+	DRM_DEBUG("freeing data buffer memory.\n");
+	drm_pci_free(dev, cpu_addr_dmah);
+	DRM_DEBUG("returning ...\n");
+
+	return failed;
+}
+
+/**
+ * Called during the DMA initialization ioctl to initialize all the necessary
+ * software and hardware state for DMA operation.
+ */
+static int mach64_do_dma_init(struct drm_device * dev, drm_mach64_init_t * init)
+{
+	drm_mach64_private_t *dev_priv;
+	u32 tmp;
+	int i, ret;
+
+	DRM_DEBUG("\n");
+
+	dev_priv = drm_alloc(sizeof(drm_mach64_private_t), DRM_MEM_DRIVER);
+	if (dev_priv == NULL)
+		return -ENOMEM;
+
+	memset(dev_priv, 0, sizeof(drm_mach64_private_t));
+
+	dev_priv->is_pci = init->is_pci;
+
+	dev_priv->fb_bpp = init->fb_bpp;
+	dev_priv->front_offset = init->front_offset;
+	dev_priv->front_pitch = init->front_pitch;
+	dev_priv->back_offset = init->back_offset;
+	dev_priv->back_pitch = init->back_pitch;
+
+	dev_priv->depth_bpp = init->depth_bpp;
+	dev_priv->depth_offset = init->depth_offset;
+	dev_priv->depth_pitch = init->depth_pitch;
+
+	dev_priv->front_offset_pitch = (((dev_priv->front_pitch / 8) << 22) |
+					(dev_priv->front_offset >> 3));
+	dev_priv->back_offset_pitch = (((dev_priv->back_pitch / 8) << 22) |
+				       (dev_priv->back_offset >> 3));
+	dev_priv->depth_offset_pitch = (((dev_priv->depth_pitch / 8) << 22) |
+					(dev_priv->depth_offset >> 3));
+
+	dev_priv->usec_timeout = 1000000;
+
+	/* Set up the freelist, placeholder list and pending list */
+	INIT_LIST_HEAD(&dev_priv->free_list);
+	INIT_LIST_HEAD(&dev_priv->placeholders);
+	INIT_LIST_HEAD(&dev_priv->pending);
+
+	dev_priv->sarea = drm_getsarea(dev);
+	if (!dev_priv->sarea) {
+		DRM_ERROR("can not find sarea!\n");
+		dev->dev_private = (void *)dev_priv;
+		mach64_do_cleanup_dma(dev);
+		return -EINVAL;
+	}
+	dev_priv->fb = drm_core_findmap(dev, init->fb_offset);
+	if (!dev_priv->fb) {
+		DRM_ERROR("can not find frame buffer map!\n");
+		dev->dev_private = (void *)dev_priv;
+		mach64_do_cleanup_dma(dev);
+		return -EINVAL;
+	}
+	dev_priv->mmio = drm_core_findmap(dev, init->mmio_offset);
+	if (!dev_priv->mmio) {
+		DRM_ERROR("can not find mmio map!\n");
+		dev->dev_private = (void *)dev_priv;
+		mach64_do_cleanup_dma(dev);
+		return -EINVAL;
+	}
+
+	dev_priv->ring_map = drm_core_findmap(dev, init->ring_offset);
+	if (!dev_priv->ring_map) {
+		DRM_ERROR("can not find ring map!\n");
+		dev->dev_private = (void *)dev_priv;
+		mach64_do_cleanup_dma(dev);
+		return -EINVAL;
+	}
+
+	dev_priv->sarea_priv = (drm_mach64_sarea_t *)
+	    ((u8 *) dev_priv->sarea->handle + init->sarea_priv_offset);
+
+	if (!dev_priv->is_pci) {
+		drm_core_ioremap(dev_priv->ring_map, dev);
+		if (!dev_priv->ring_map->handle) {
+			DRM_ERROR("can not ioremap virtual address for"
+				  " descriptor ring\n");
+			dev->dev_private = (void *)dev_priv;
+			mach64_do_cleanup_dma(dev);
+			return -ENOMEM;
+		}
+		dev->agp_buffer_token = init->buffers_offset;
+		dev->agp_buffer_map =
+		    drm_core_findmap(dev, init->buffers_offset);
+		if (!dev->agp_buffer_map) {
+			DRM_ERROR("can not find dma buffer map!\n");
+			dev->dev_private = (void *)dev_priv;
+			mach64_do_cleanup_dma(dev);
+			return -EINVAL;
+		}
+		/* there might be a nicer way to do this -
+		   dev isn't passed all the way though the mach64 - DA */
+		dev_priv->dev_buffers = dev->agp_buffer_map;
+
+		drm_core_ioremap(dev->agp_buffer_map, dev);
+		if (!dev->agp_buffer_map->handle) {
+			DRM_ERROR("can not ioremap virtual address for"
+				  " dma buffer\n");
+			dev->dev_private = (void *)dev_priv;
+			mach64_do_cleanup_dma(dev);
+			return -ENOMEM;
+		}
+		dev_priv->agp_textures =
+		    drm_core_findmap(dev, init->agp_textures_offset);
+		if (!dev_priv->agp_textures) {
+			DRM_ERROR("can not find agp texture region!\n");
+			dev->dev_private = (void *)dev_priv;
+			mach64_do_cleanup_dma(dev);
+			return -EINVAL;
+		}
+	}
+
+	dev->dev_private = (void *)dev_priv;
+
+	dev_priv->driver_mode = init->dma_mode;
+
+	/* changing the FIFO size from the default causes problems with DMA */
+	tmp = MACH64_READ(MACH64_GUI_CNTL);
+	if ((tmp & MACH64_CMDFIFO_SIZE_MASK) != MACH64_CMDFIFO_SIZE_128) {
+		DRM_INFO("Setting FIFO size to 128 entries\n");
+		/* FIFO must be empty to change the FIFO depth */
+		if ((ret = mach64_do_wait_for_idle(dev_priv))) {
+			DRM_ERROR
+			    ("wait for idle failed before changing FIFO depth!\n");
+			mach64_do_cleanup_dma(dev);
+			return ret;
+		}
+		MACH64_WRITE(MACH64_GUI_CNTL, ((tmp & ~MACH64_CMDFIFO_SIZE_MASK)
+					       | MACH64_CMDFIFO_SIZE_128));
+		/* need to read GUI_STAT for proper sync according to docs */
+		if ((ret = mach64_do_wait_for_idle(dev_priv))) {
+			DRM_ERROR
+			    ("wait for idle failed when changing FIFO depth!\n");
+			mach64_do_cleanup_dma(dev);
+			return ret;
+		}
+	}
+
+	dev_priv->ring.size = 0x4000;	/* 16KB */
+	dev_priv->ring.start = dev_priv->ring_map->handle;
+	dev_priv->ring.start_addr = (u32) dev_priv->ring_map->offset;
+
+	memset(dev_priv->ring.start, 0, dev_priv->ring.size);
+	DRM_INFO("descriptor ring: cpu addr %p, bus addr: 0x%08x\n",
+		 dev_priv->ring.start, dev_priv->ring.start_addr);
+
+	ret = 0;
+	if (dev_priv->driver_mode != MACH64_MODE_MMIO) {
+
+		/* enable block 1 registers and bus mastering */
+		MACH64_WRITE(MACH64_BUS_CNTL, ((MACH64_READ(MACH64_BUS_CNTL)
+						| MACH64_BUS_EXT_REG_EN)
+					       & ~MACH64_BUS_MASTER_DIS));
+
+		/* try a DMA GUI-mastering pass and fall back to MMIO if it fails */
+		DRM_DEBUG("Starting DMA test...\n");
+		if ((ret = mach64_bm_dma_test(dev))) {
+			dev_priv->driver_mode = MACH64_MODE_MMIO;
+		}
+	}
+
+	switch (dev_priv->driver_mode) {
+	case MACH64_MODE_MMIO:
+		MACH64_WRITE(MACH64_BUS_CNTL, (MACH64_READ(MACH64_BUS_CNTL)
+					       | MACH64_BUS_EXT_REG_EN
+					       | MACH64_BUS_MASTER_DIS));
+		if (init->dma_mode == MACH64_MODE_MMIO)
+			DRM_INFO("Forcing pseudo-DMA mode\n");
+		else
+			DRM_INFO
+			    ("DMA test failed (ret=%d), using pseudo-DMA mode\n",
+			     ret);
+		break;
+	case MACH64_MODE_DMA_SYNC:
+		DRM_INFO("DMA test succeeded, using synchronous DMA mode\n");
+		break;
+	case MACH64_MODE_DMA_ASYNC:
+	default:
+		DRM_INFO("DMA test succeeded, using asynchronous DMA mode\n");
+	}
+
+	dev_priv->ring_running = 0;
+
+	/* setup offsets for physical address of table start and end */
+	dev_priv->ring.head_addr = dev_priv->ring.start_addr;
+	dev_priv->ring.head = dev_priv->ring.tail = 0;
+	dev_priv->ring.tail_mask = (dev_priv->ring.size / sizeof(u32)) - 1;
+	dev_priv->ring.space = dev_priv->ring.size;
+
+	/* setup physical address and size of descriptor table */
+	mach64_do_wait_for_fifo(dev_priv, 1);
+	MACH64_WRITE(MACH64_BM_GUI_TABLE_CMD,
+		     (dev_priv->ring.
+		      head_addr | MACH64_CIRCULAR_BUF_SIZE_16KB));
+
+	/* init frame counter */
+	dev_priv->sarea_priv->frames_queued = 0;
+	for (i = 0; i < MACH64_MAX_QUEUED_FRAMES; i++) {
+		dev_priv->frame_ofs[i] = ~0;	/* All ones indicates placeholder */
+	}
+
+	/* Allocate the DMA buffer freelist */
+	if ((ret = mach64_init_freelist(dev))) {
+		DRM_ERROR("Freelist allocation failed\n");
+		mach64_do_cleanup_dma(dev);
+		return ret;
+	}
+
+	return 0;
+}
+
+/*******************************************************************/
+/** MMIO Pseudo-DMA (intended primarily for debugging, not performance)
+ */
+
+int mach64_do_dispatch_pseudo_dma(drm_mach64_private_t *dev_priv)
+{
+	drm_mach64_descriptor_ring_t *ring = &dev_priv->ring;
+	volatile u32 *ring_read;
+	struct list_head *ptr;
+	drm_mach64_freelist_t *entry;
+	struct drm_buf *buf = NULL;
+	u32 *buf_ptr;
+	u32 used, reg, target;
+	int fifo, count, found, ret, no_idle_wait;
+
+	fifo = count = reg = no_idle_wait = 0;
+	target = MACH64_BM_ADDR;
+
+	if ((ret = mach64_do_wait_for_idle(dev_priv)) < 0) {
+		DRM_INFO("idle failed before pseudo-dma dispatch, resetting engine\n");
+		mach64_dump_engine_info(dev_priv);
+		mach64_do_engine_reset(dev_priv);
+		return ret;
+	}
+
+	ring_read = (u32 *) ring->start;
+
+	while (ring->tail != ring->head) {
+		u32 buf_addr, new_target, offset;
+		u32 bytes, remaining, head, eol;
+
+		head = ring->head;
+
+		new_target =
+		    le32_to_cpu(ring_read[head++]) - MACH64_APERTURE_OFFSET;
+		buf_addr = le32_to_cpu(ring_read[head++]);
+		eol = le32_to_cpu(ring_read[head]) & MACH64_DMA_EOL;
+		bytes = le32_to_cpu(ring_read[head++])
+		    & ~(MACH64_DMA_HOLD_OFFSET | MACH64_DMA_EOL);
+		head++;
+		head &= ring->tail_mask;
+
+		/* can't wait for idle between a blit setup descriptor
+		 * and a HOSTDATA descriptor or the engine will lock
+		 */
+		if (new_target == MACH64_BM_HOSTDATA
+		    && target == MACH64_BM_ADDR)
+			no_idle_wait = 1;
+
+		target = new_target;
+
+		found = 0;
+		offset = 0;
+		list_for_each(ptr, &dev_priv->pending) {
+			entry = list_entry(ptr, drm_mach64_freelist_t, list);
+			buf = entry->buf;
+			offset = buf_addr - GETBUFADDR(buf);
+			if (offset >= 0 && offset < MACH64_BUFFER_SIZE) {
+				found = 1;
+				break;
+			}
+		}
+
+		if (!found || buf == NULL) {
+			DRM_ERROR
+			    ("Couldn't find pending buffer: head: %u tail: %u buf_addr: 0x%08x %s\n",
+			     head, ring->tail, buf_addr, (eol ? "eol" : ""));
+			mach64_dump_ring_info(dev_priv);
+			mach64_do_engine_reset(dev_priv);
+			return -EINVAL;
+		}
+
+		/* Hand feed the buffer to the card via MMIO, waiting for the fifo
+		 * every 16 writes
+		 */
+		DRM_DEBUG("target: (0x%08x) %s\n", target,
+			  (target ==
+			   MACH64_BM_HOSTDATA ? "BM_HOSTDATA" : "BM_ADDR"));
+		DRM_DEBUG("offset: %u bytes: %u used: %u\n", offset, bytes,
+			  buf->used);
+
+		remaining = (buf->used - offset) >> 2;	/* dwords remaining in buffer */
+		used = bytes >> 2;	/* dwords in buffer for this descriptor */
+		buf_ptr = (u32 *) ((char *)GETBUFPTR(buf) + offset);
+
+		while (used) {
+
+			if (count == 0) {
+				if (target == MACH64_BM_HOSTDATA) {
+					reg = DMAREG(MACH64_HOST_DATA0);
+					count =
+					    (remaining > 16) ? 16 : remaining;
+					fifo = 0;
+				} else {
+					reg = le32_to_cpu(*buf_ptr++);
+					used--;
+					count = (reg >> 16) + 1;
+				}
+
+				reg = reg & 0xffff;
+				reg = MMSELECT(reg);
+			}
+			while (count && used) {
+				if (!fifo) {
+					if (no_idle_wait) {
+						if ((ret =
+						     mach64_do_wait_for_fifo
+						     (dev_priv, 16)) < 0) {
+							no_idle_wait = 0;
+							return ret;
+						}
+					} else {
+						if ((ret =
+						     mach64_do_wait_for_idle
+						     (dev_priv)) < 0) {
+							return ret;
+						}
+					}
+					fifo = 16;
+				}
+				--fifo;
+				MACH64_WRITE(reg, le32_to_cpu(*buf_ptr++));
+				used--;
+				remaining--;
+
+				reg += 4;
+				count--;
+			}
+		}
+		ring->head = head;
+		ring->head_addr = ring->start_addr + (ring->head * sizeof(u32));
+		ring->space += (4 * sizeof(u32));
+	}
+
+	if ((ret = mach64_do_wait_for_idle(dev_priv)) < 0) {
+		return ret;
+	}
+	MACH64_WRITE(MACH64_BM_GUI_TABLE_CMD,
+		     ring->head_addr | MACH64_CIRCULAR_BUF_SIZE_16KB);
+
+	DRM_DEBUG("completed\n");
+	return 0;
+}
+
+/*@}*/
+
+
+/*******************************************************************/
+/** \name DMA cleanup */
+/*@{*/
+
+int mach64_do_cleanup_dma(struct drm_device * dev)
+{
+	DRM_DEBUG("\n");
+
+	/* Make sure interrupts are disabled here because the uninstall ioctl
+	 * may not have been called from userspace and after dev_private
+	 * is freed, it's too late.
+	 */
+	if (dev->irq)
+		drm_irq_uninstall(dev);
+
+	if (dev->dev_private) {
+		drm_mach64_private_t *dev_priv = dev->dev_private;
+
+		if (!dev_priv->is_pci) {
+			if (dev_priv->ring_map)
+				drm_core_ioremapfree(dev_priv->ring_map, dev);
+
+			if (dev->agp_buffer_map) {
+				drm_core_ioremapfree(dev->agp_buffer_map, dev);
+				dev->agp_buffer_map = NULL;
+			}
+		}
+
+		mach64_destroy_freelist(dev);
+
+		drm_free(dev_priv, sizeof(drm_mach64_private_t),
+			 DRM_MEM_DRIVER);
+		dev->dev_private = NULL;
+	}
+
+	return 0;
+}
+
+/*@}*/
+
+
+/*******************************************************************/
+/** \name IOCTL handlers */
+/*@{*/
+
+int mach64_dma_init(struct drm_device *dev, void *data,
+		    struct drm_file *file_priv)
+{
+	drm_mach64_init_t *init = data;
+
+	DRM_DEBUG("\n");
+
+	LOCK_TEST_WITH_RETURN(dev, file_priv);
+
+	switch (init->func) {
+	case DRM_MACH64_INIT_DMA:
+		return mach64_do_dma_init(dev, init);
+	case DRM_MACH64_CLEANUP_DMA:
+		return mach64_do_cleanup_dma(dev);
+	}
+
+	return -EINVAL;
+}
+
+int mach64_dma_idle(struct drm_device *dev, void *data,
+		    struct drm_file *file_priv)
+{
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+
+	DRM_DEBUG("\n");
+
+	LOCK_TEST_WITH_RETURN(dev, file_priv);
+
+	return mach64_do_dma_idle(dev_priv);
+}
+
+int mach64_dma_flush(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv)
+{
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+
+	DRM_DEBUG("\n");
+
+	LOCK_TEST_WITH_RETURN(dev, file_priv);
+
+	return mach64_do_dma_flush(dev_priv);
+}
+
+int mach64_engine_reset(struct drm_device *dev, void *data,
+			struct drm_file *file_priv)
+{
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+
+	DRM_DEBUG("\n");
+
+	LOCK_TEST_WITH_RETURN(dev, file_priv);
+
+	return mach64_do_engine_reset(dev_priv);
+}
+
+/*@}*/
+
+
+/*******************************************************************/
+/** \name Freelist management */
+/*@{*/
+
+int mach64_init_freelist(struct drm_device * dev)
+{
+	struct drm_device_dma *dma = dev->dma;
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+	drm_mach64_freelist_t *entry;
+	struct list_head *ptr;
+	int i;
+
+	DRM_DEBUG("adding %d buffers to freelist\n", dma->buf_count);
+
+	for (i = 0; i < dma->buf_count; i++) {
+		if ((entry =
+		     (drm_mach64_freelist_t *)
+		     drm_alloc(sizeof(drm_mach64_freelist_t),
+			       DRM_MEM_BUFLISTS)) == NULL)
+			return -ENOMEM;
+		memset(entry, 0, sizeof(drm_mach64_freelist_t));
+		entry->buf = dma->buflist[i];
+		ptr = &entry->list;
+		list_add_tail(ptr, &dev_priv->free_list);
+	}
+
+	return 0;
+}
+
+void mach64_destroy_freelist(struct drm_device * dev)
+{
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+	drm_mach64_freelist_t *entry;
+	struct list_head *ptr;
+	struct list_head *tmp;
+
+	DRM_DEBUG("\n");
+
+	list_for_each_safe(ptr, tmp, &dev_priv->pending) {
+		list_del(ptr);
+		entry = list_entry(ptr, drm_mach64_freelist_t, list);
+		drm_free(entry, sizeof(*entry), DRM_MEM_BUFLISTS);
+	}
+	list_for_each_safe(ptr, tmp, &dev_priv->placeholders) {
+		list_del(ptr);
+		entry = list_entry(ptr, drm_mach64_freelist_t, list);
+		drm_free(entry, sizeof(*entry), DRM_MEM_BUFLISTS);
+	}
+
+	list_for_each_safe(ptr, tmp, &dev_priv->free_list) {
+		list_del(ptr);
+		entry = list_entry(ptr, drm_mach64_freelist_t, list);
+		drm_free(entry, sizeof(*entry), DRM_MEM_BUFLISTS);
+	}
+}
+
+/* IMPORTANT: This function should only be called when the engine is idle or locked up,
+ * as it assumes all buffers in the pending list have been completed by the hardware.
+ */
+int mach64_do_release_used_buffers(drm_mach64_private_t *dev_priv)
+{
+	struct list_head *ptr;
+	struct list_head *tmp;
+	drm_mach64_freelist_t *entry;
+	int i;
+
+	if (list_empty(&dev_priv->pending))
+		return 0;
+
+	/* Iterate the pending list and move all buffers into the freelist... */
+	i = 0;
+	list_for_each_safe(ptr, tmp, &dev_priv->pending) {
+		entry = list_entry(ptr, drm_mach64_freelist_t, list);
+		if (entry->discard) {
+			entry->buf->pending = 0;
+			list_del(ptr);
+			list_add_tail(ptr, &dev_priv->free_list);
+			i++;
+		}
+	}
+
+	DRM_DEBUG("released %d buffers from pending list\n", i);
+
+	return 0;
+}
+
+static int mach64_do_reclaim_completed(drm_mach64_private_t *dev_priv)
+{
+	drm_mach64_descriptor_ring_t *ring = &dev_priv->ring;
+	struct list_head *ptr;
+	struct list_head *tmp;
+	drm_mach64_freelist_t *entry;
+	u32 head, tail, ofs;
+
+	mach64_ring_tick(dev_priv, ring);
+	head = ring->head;
+	tail = ring->tail;
+
+	if (head == tail) {
+#if MACH64_EXTRA_CHECKING
+		if (MACH64_READ(MACH64_GUI_STAT) & MACH64_GUI_ACTIVE) {
+			DRM_ERROR("Empty ring with non-idle engine!\n");
+			mach64_dump_ring_info(dev_priv);
+			return -1;
+		}
+#endif
+		/* last pass is complete, so release everything */
+		mach64_do_release_used_buffers(dev_priv);
+		DRM_DEBUG("idle engine, freed all buffers.\n");
+		if (list_empty(&dev_priv->free_list)) {
+			DRM_ERROR("Freelist empty with idle engine\n");
+			return -1;
+		}
+		return 0;
+	}
+	/* Look for a completed buffer and bail out of the loop
+	 * as soon as we find one -- don't waste time trying
+	 * to free extra bufs here, leave that to do_release_used_buffers
+	 */
+	list_for_each_safe(ptr, tmp, &dev_priv->pending) {
+		entry = list_entry(ptr, drm_mach64_freelist_t, list);
+		ofs = entry->ring_ofs;
+		if (entry->discard &&
+		    ((head < tail && (ofs < head || ofs >= tail)) ||
+		     (head > tail && (ofs < head && ofs >= tail)))) {
+#if MACH64_EXTRA_CHECKING
+			int i;
+
+			for (i = head; i != tail; i = (i + 4) & ring->tail_mask)
+			{
+				u32 o1 = le32_to_cpu(((u32 *) ring->
+						 start)[i + 1]);
+				u32 o2 = GETBUFADDR(entry->buf);
+
+				if (o1 == o2) {
+					DRM_ERROR
+					    ("Attempting to free used buffer: "
+					     "i=%d  buf=0x%08x\n",
+					     i, o1);
+					mach64_dump_ring_info(dev_priv);
+					return -1;
+				}
+			}
+#endif
+			/* found a processed buffer */
+			entry->buf->pending = 0;
+			list_del(ptr);
+			list_add_tail(ptr, &dev_priv->free_list);
+			DRM_DEBUG
+			    ("freed processed buffer (head=%d tail=%d "
+			     "buf ring ofs=%d).\n",
+			     head, tail, ofs);
+			return 0;
+		}
+	}
+
+	return 1;
+}
+
+struct drm_buf *mach64_freelist_get(drm_mach64_private_t *dev_priv)
+{
+	drm_mach64_descriptor_ring_t *ring = &dev_priv->ring;
+	drm_mach64_freelist_t *entry;
+	struct list_head *ptr;
+	int t;
+
+	if (list_empty(&dev_priv->free_list)) {
+		if (list_empty(&dev_priv->pending)) {
+			DRM_ERROR
+			    ("Couldn't get buffer - pending and free lists empty\n");
+			t = 0;
+			list_for_each(ptr, &dev_priv->placeholders) {
+				t++;
+			}
+			DRM_INFO("Placeholders: %d\n", t);
+			return NULL;
+		}
+
+		for (t = 0; t < dev_priv->usec_timeout; t++) {
+			int ret;
+
+			ret = mach64_do_reclaim_completed(dev_priv);
+			if (ret == 0)
+				goto _freelist_entry_found;
+			if (ret < 0)
+				return NULL;
+
+			DRM_UDELAY(1);
+		}
+		mach64_dump_ring_info(dev_priv);
+		DRM_ERROR
+		    ("timeout waiting for buffers: ring head_addr: 0x%08x head: %d tail: %d\n",
+		     ring->head_addr, ring->head, ring->tail);
+		return NULL;
+	}
+
+      _freelist_entry_found:
+	ptr = dev_priv->free_list.next;
+	list_del(ptr);
+	entry = list_entry(ptr, drm_mach64_freelist_t, list);
+	entry->buf->used = 0;
+	list_add_tail(ptr, &dev_priv->placeholders);
+	return entry->buf;
+}
+
+int mach64_freelist_put(drm_mach64_private_t *dev_priv, struct drm_buf *copy_buf)
+{
+	struct list_head *ptr;
+	drm_mach64_freelist_t *entry;
+
+#if MACH64_EXTRA_CHECKING
+	list_for_each(ptr, &dev_priv->pending) {
+		entry = list_entry(ptr, drm_mach64_freelist_t, list);
+		if (copy_buf == entry->buf) {
+			DRM_ERROR("Trying to release a pending buf\n");
+			return -EFAULT;
+		}
+	}
+#endif
+	ptr = dev_priv->placeholders.next;
+	entry = list_entry(ptr, drm_mach64_freelist_t, list);
+	copy_buf->pending = 0;
+	copy_buf->used = 0;
+	entry->buf = copy_buf;
+	entry->discard = 1;
+	list_del(ptr);
+	list_add_tail(ptr, &dev_priv->free_list);
+
+	return 0;
+}
+
+/*@}*/
+
+
+/*******************************************************************/
+/** \name DMA buffer request and submission IOCTL handler */
+/*@{*/
+
+static int mach64_dma_get_buffers(struct drm_device *dev,
+				  struct drm_file *file_priv,
+				  struct drm_dma * d)
+{
+	int i;
+	struct drm_buf *buf;
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+
+	for (i = d->granted_count; i < d->request_count; i++) {
+		buf = mach64_freelist_get(dev_priv);
+#if MACH64_EXTRA_CHECKING
+		if (!buf)
+			return -EFAULT;
+#else
+		if (!buf)
+			return -EAGAIN;
+#endif
+
+		buf->file_priv = file_priv;
+
+		if (DRM_COPY_TO_USER(&d->request_indices[i], &buf->idx,
+				     sizeof(buf->idx)))
+			return -EFAULT;
+		if (DRM_COPY_TO_USER(&d->request_sizes[i], &buf->total,
+				     sizeof(buf->total)))
+			return -EFAULT;
+
+		d->granted_count++;
+	}
+	return 0;
+}
+
+int mach64_dma_buffers(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv)
+{
+	struct drm_device_dma *dma = dev->dma;
+	struct drm_dma *d = data;
+	int ret = 0;
+
+	LOCK_TEST_WITH_RETURN(dev, file_priv);
+
+	/* Please don't send us buffers.
+	 */
+	if (d->send_count != 0) {
+		DRM_ERROR("Process %d trying to send %d buffers via drmDMA\n",
+			  DRM_CURRENTPID, d->send_count);
+		return -EINVAL;
+	}
+
+	/* We'll send you buffers.
+	 */
+	if (d->request_count < 0 || d->request_count > dma->buf_count) {
+		DRM_ERROR("Process %d trying to get %d buffers (of %d max)\n",
+			  DRM_CURRENTPID, d->request_count, dma->buf_count);
+		ret = -EINVAL;
+	}
+
+	d->granted_count = 0;
+
+	if (d->request_count) {
+		ret = mach64_dma_get_buffers(dev, file_priv, d);
+	}
+
+	return ret;
+}
+
+void mach64_driver_lastclose(struct drm_device * dev)
+{
+	mach64_do_cleanup_dma(dev);
+}
+
+/*@}*/
--- /dev/null
+++ b/drivers/gpu/drm/mach64/mach64_drm.h
@@ -0,0 +1,256 @@
+/* mach64_drm.h -- Public header for the mach64 driver -*- linux-c -*-
+ * Created: Thu Nov 30 20:04:32 2000 by gareth@valinux.com
+ */
+/*
+ * Copyright 2000 Gareth Hughes
+ * Copyright 2002 Frank C. Earl
+ * Copyright 2002-2003 Leif Delgass
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT OWNER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Gareth Hughes <gareth@valinux.com>
+ *    Frank C. Earl <fearl@airmail.net>
+ *    Leif Delgass <ldelgass@retinalburn.net>
+ */
+
+#ifndef __MACH64_DRM_H__
+#define __MACH64_DRM_H__
+
+/* WARNING: If you change any of these defines, make sure to change the
+ * defines in the Xserver file (mach64_sarea.h)
+ */
+#ifndef __MACH64_SAREA_DEFINES__
+#define __MACH64_SAREA_DEFINES__
+
+/* What needs to be changed for the current vertex buffer?
+ * GH: We're going to be pedantic about this.  We want the card to do as
+ * little as possible, so let's avoid having it fetch a whole bunch of
+ * register values that don't change all that often, if at all.
+ */
+#define MACH64_UPLOAD_DST_OFF_PITCH	0x0001
+#define MACH64_UPLOAD_Z_OFF_PITCH	0x0002
+#define MACH64_UPLOAD_Z_ALPHA_CNTL	0x0004
+#define MACH64_UPLOAD_SCALE_3D_CNTL	0x0008
+#define MACH64_UPLOAD_DP_FOG_CLR	0x0010
+#define MACH64_UPLOAD_DP_WRITE_MASK	0x0020
+#define MACH64_UPLOAD_DP_PIX_WIDTH	0x0040
+#define MACH64_UPLOAD_SETUP_CNTL	0x0080
+#define MACH64_UPLOAD_MISC		0x0100
+#define MACH64_UPLOAD_TEXTURE		0x0200
+#define MACH64_UPLOAD_TEX0IMAGE		0x0400
+#define MACH64_UPLOAD_TEX1IMAGE		0x0800
+#define MACH64_UPLOAD_CLIPRECTS		0x1000	/* handled client-side */
+#define MACH64_UPLOAD_CONTEXT		0x00ff
+#define MACH64_UPLOAD_ALL		0x1fff
+
+/* DMA buffer size
+ */
+#define MACH64_BUFFER_SIZE		16384
+
+/* Max number of swaps allowed on the ring
+ * before the client must wait
+ */
+#define MACH64_MAX_QUEUED_FRAMES        3U
+
+/* Byte offsets for host blit buffer data
+ */
+#define MACH64_HOSTDATA_BLIT_OFFSET	104
+
+/* Keep these small for testing.
+ */
+#define MACH64_NR_SAREA_CLIPRECTS	8
+
+#define MACH64_CARD_HEAP		0
+#define MACH64_AGP_HEAP			1
+#define MACH64_NR_TEX_HEAPS		2
+#define MACH64_NR_TEX_REGIONS		64
+#define MACH64_LOG_TEX_GRANULARITY	16
+
+#define MACH64_TEX_MAXLEVELS		1
+
+#define MACH64_NR_CONTEXT_REGS		15
+#define MACH64_NR_TEXTURE_REGS		4
+
+#endif				/* __MACH64_SAREA_DEFINES__ */
+
+typedef struct {
+	unsigned int dst_off_pitch;
+
+	unsigned int z_off_pitch;
+	unsigned int z_cntl;
+	unsigned int alpha_tst_cntl;
+
+	unsigned int scale_3d_cntl;
+
+	unsigned int sc_left_right;
+	unsigned int sc_top_bottom;
+
+	unsigned int dp_fog_clr;
+	unsigned int dp_write_mask;
+	unsigned int dp_pix_width;
+	unsigned int dp_mix;
+	unsigned int dp_src;
+
+	unsigned int clr_cmp_cntl;
+	unsigned int gui_traj_cntl;
+
+	unsigned int setup_cntl;
+
+	unsigned int tex_size_pitch;
+	unsigned int tex_cntl;
+	unsigned int secondary_tex_off;
+	unsigned int tex_offset;
+} drm_mach64_context_regs_t;
+
+typedef struct drm_mach64_sarea {
+	/* The channel for communication of state information to the kernel
+	 * on firing a vertex dma buffer.
+	 */
+	drm_mach64_context_regs_t context_state;
+	unsigned int dirty;
+	unsigned int vertsize;
+
+	/* The current cliprects, or a subset thereof.
+	 */
+	struct drm_clip_rect boxes[MACH64_NR_SAREA_CLIPRECTS];
+	unsigned int nbox;
+
+	/* Counters for client-side throttling of rendering clients.
+	 */
+	unsigned int frames_queued;
+
+	/* Texture memory LRU.
+	 */
+	struct drm_tex_region tex_list[MACH64_NR_TEX_HEAPS][MACH64_NR_TEX_REGIONS +
+						       1];
+	unsigned int tex_age[MACH64_NR_TEX_HEAPS];
+	int ctx_owner;
+} drm_mach64_sarea_t;
+
+/* WARNING: If you change any of these defines, make sure to change the
+ * defines in the Xserver file (mach64_common.h)
+ */
+
+/* Mach64 specific ioctls
+ * The device specific ioctl range is 0x40 to 0x79.
+ */
+
+#define DRM_MACH64_INIT           0x00
+#define DRM_MACH64_IDLE           0x01
+#define DRM_MACH64_RESET          0x02
+#define DRM_MACH64_SWAP           0x03
+#define DRM_MACH64_CLEAR          0x04
+#define DRM_MACH64_VERTEX         0x05
+#define DRM_MACH64_BLIT           0x06
+#define DRM_MACH64_FLUSH          0x07
+#define DRM_MACH64_GETPARAM       0x08
+
+#define DRM_IOCTL_MACH64_INIT           DRM_IOW( DRM_COMMAND_BASE + DRM_MACH64_INIT, drm_mach64_init_t)
+#define DRM_IOCTL_MACH64_IDLE           DRM_IO(  DRM_COMMAND_BASE + DRM_MACH64_IDLE )
+#define DRM_IOCTL_MACH64_RESET          DRM_IO(  DRM_COMMAND_BASE + DRM_MACH64_RESET )
+#define DRM_IOCTL_MACH64_SWAP           DRM_IO(  DRM_COMMAND_BASE + DRM_MACH64_SWAP )
+#define DRM_IOCTL_MACH64_CLEAR          DRM_IOW( DRM_COMMAND_BASE + DRM_MACH64_CLEAR, drm_mach64_clear_t)
+#define DRM_IOCTL_MACH64_VERTEX         DRM_IOW( DRM_COMMAND_BASE + DRM_MACH64_VERTEX, drm_mach64_vertex_t)
+#define DRM_IOCTL_MACH64_BLIT           DRM_IOW( DRM_COMMAND_BASE + DRM_MACH64_BLIT, drm_mach64_blit_t)
+#define DRM_IOCTL_MACH64_FLUSH          DRM_IO(  DRM_COMMAND_BASE + DRM_MACH64_FLUSH )
+#define DRM_IOCTL_MACH64_GETPARAM       DRM_IOWR( DRM_COMMAND_BASE + DRM_MACH64_GETPARAM, drm_mach64_getparam_t)
+
+/* Buffer flags for clears
+ */
+#define MACH64_FRONT			0x1
+#define MACH64_BACK			0x2
+#define MACH64_DEPTH			0x4
+
+/* Primitive types for vertex buffers
+ */
+#define MACH64_PRIM_POINTS		0x00000000
+#define MACH64_PRIM_LINES		0x00000001
+#define MACH64_PRIM_LINE_LOOP		0x00000002
+#define MACH64_PRIM_LINE_STRIP		0x00000003
+#define MACH64_PRIM_TRIANGLES		0x00000004
+#define MACH64_PRIM_TRIANGLE_STRIP	0x00000005
+#define MACH64_PRIM_TRIANGLE_FAN	0x00000006
+#define MACH64_PRIM_QUADS		0x00000007
+#define MACH64_PRIM_QUAD_STRIP		0x00000008
+#define MACH64_PRIM_POLYGON		0x00000009
+
+typedef enum _drm_mach64_dma_mode_t {
+	MACH64_MODE_DMA_ASYNC,
+	MACH64_MODE_DMA_SYNC,
+	MACH64_MODE_MMIO
+} drm_mach64_dma_mode_t;
+
+typedef struct drm_mach64_init {
+	enum {
+		DRM_MACH64_INIT_DMA = 0x01,
+		DRM_MACH64_CLEANUP_DMA = 0x02
+	} func;
+
+	unsigned long sarea_priv_offset;
+	int is_pci;
+	drm_mach64_dma_mode_t dma_mode;
+
+	unsigned int fb_bpp;
+	unsigned int front_offset, front_pitch;
+	unsigned int back_offset, back_pitch;
+
+	unsigned int depth_bpp;
+	unsigned int depth_offset, depth_pitch;
+
+	unsigned long fb_offset;
+	unsigned long mmio_offset;
+	unsigned long ring_offset;
+	unsigned long buffers_offset;
+	unsigned long agp_textures_offset;
+} drm_mach64_init_t;
+
+typedef struct drm_mach64_clear {
+	unsigned int flags;
+	int x, y, w, h;
+	unsigned int clear_color;
+	unsigned int clear_depth;
+} drm_mach64_clear_t;
+
+typedef struct drm_mach64_vertex {
+	int prim;
+	void *buf;		/* Address of vertex buffer */
+	unsigned long used;	/* Number of bytes in buffer */
+	int discard;		/* Client finished with buffer? */
+} drm_mach64_vertex_t;
+
+typedef struct drm_mach64_blit {
+	void *buf;
+	int pitch;
+	int offset;
+	int format;
+	unsigned short x, y;
+	unsigned short width, height;
+} drm_mach64_blit_t;
+
+typedef struct drm_mach64_getparam {
+	enum {
+		MACH64_PARAM_FRAMES_QUEUED = 0x01,
+		MACH64_PARAM_IRQ_NR = 0x02
+	} param;
+	void *value;
+} drm_mach64_getparam_t;
+
+#endif
--- /dev/null
+++ b/drivers/gpu/drm/mach64/mach64_drv.c
@@ -0,0 +1,105 @@
+/* mach64_drv.c -- mach64 (Rage Pro) driver -*- linux-c -*-
+ * Created: Fri Nov 24 18:34:32 2000 by gareth@valinux.com
+ *
+ * Copyright 2000 Gareth Hughes
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * GARETH HUGHES BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Gareth Hughes <gareth@valinux.com>
+ *    Leif Delgass <ldelgass@retinalburn.net>
+ */
+
+#include "drmP.h"
+#include "drm.h"
+#include "mach64_drm.h"
+#include "mach64_drv.h"
+
+#include "drm_pciids.h"
+
+static struct pci_device_id pciidlist[] = {
+	mach64_PCI_IDS
+};
+
+static int probe(struct pci_dev *pdev, const struct pci_device_id *ent);
+static struct drm_driver driver = {
+	.driver_features =
+	    DRIVER_USE_AGP | DRIVER_USE_MTRR | DRIVER_PCI_DMA | DRIVER_HAVE_DMA
+	    | DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED,
+	.lastclose = mach64_driver_lastclose,
+	.get_vblank_counter = mach64_get_vblank_counter,
+	.enable_vblank = mach64_enable_vblank,
+	.disable_vblank = mach64_disable_vblank,
+	.irq_preinstall = mach64_driver_irq_preinstall,
+	.irq_postinstall = mach64_driver_irq_postinstall,
+	.irq_uninstall = mach64_driver_irq_uninstall,
+	.irq_handler = mach64_driver_irq_handler,
+	.reclaim_buffers = drm_core_reclaim_buffers,
+	.get_map_ofs = drm_core_get_map_ofs,
+	.get_reg_ofs = drm_core_get_reg_ofs,
+	.ioctls = mach64_ioctls,
+	.dma_ioctl = mach64_dma_buffers,
+	.fops = {
+		.owner = THIS_MODULE,
+		.open = drm_open,
+		.release = drm_release,
+		.ioctl = drm_ioctl,
+		.mmap = drm_mmap,
+		.poll = drm_poll,
+		.fasync = drm_fasync,
+		},
+	.pci_driver = {
+		.name = DRIVER_NAME,
+		.id_table = pciidlist,
+		.probe = probe,
+		.remove = __devexit_p(drm_cleanup_pci),
+	},
+
+	.name = DRIVER_NAME,
+	.desc = DRIVER_DESC,
+	.date = DRIVER_DATE,
+	.major = DRIVER_MAJOR,
+	.minor = DRIVER_MINOR,
+	.patchlevel = DRIVER_PATCHLEVEL,
+};
+
+static int probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+	return drm_get_dev(pdev, ent, &driver);
+}
+
+
+static int __init mach64_init(void)
+{
+	driver.num_ioctls = mach64_max_ioctl;
+	return drm_init(&driver, pciidlist);
+}
+
+static void __exit mach64_exit(void)
+{
+	drm_exit(&driver);
+}
+
+module_init(mach64_init);
+module_exit(mach64_exit);
+
+MODULE_AUTHOR(DRIVER_AUTHOR);
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL and additional rights");
--- /dev/null
+++ b/drivers/gpu/drm/mach64/mach64_drv.h
@@ -0,0 +1,859 @@
+/* mach64_drv.h -- Private header for mach64 driver -*- linux-c -*-
+ * Created: Fri Nov 24 22:07:58 2000 by gareth@valinux.com
+ */
+/*
+ * Copyright 2000 Gareth Hughes
+ * Copyright 2002 Frank C. Earl
+ * Copyright 2002-2003 Leif Delgass
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT OWNER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Gareth Hughes <gareth@valinux.com>
+ *    Frank C. Earl <fearl@airmail.net>
+ *    Leif Delgass <ldelgass@retinalburn.net>
+ *    José Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#ifndef __MACH64_DRV_H__
+#define __MACH64_DRV_H__
+
+/* General customization:
+ */
+
+#define DRIVER_AUTHOR		"Gareth Hughes, Leif Delgass, José Fonseca"
+
+#define DRIVER_NAME		"mach64"
+#define DRIVER_DESC		"DRM module for the ATI Rage Pro"
+#define DRIVER_DATE		"20060718"
+
+#define DRIVER_MAJOR		2
+#define DRIVER_MINOR		0
+#define DRIVER_PATCHLEVEL	0
+
+/* FIXME: remove these when not needed */
+/* Development driver options */
+#define MACH64_EXTRA_CHECKING     0	/* Extra sanity checks for DMA/freelist management */
+#define MACH64_VERBOSE		  0	/* Verbose debugging output */
+
+typedef struct drm_mach64_freelist {
+	struct list_head list;	/* List pointers for free_list, placeholders, or pending list */
+	struct drm_buf *buf;		/* Pointer to the buffer */
+	int discard;		/* This flag is set when we're done (re)using a buffer */
+	u32 ring_ofs;		/* dword offset in ring of last descriptor for this buffer */
+} drm_mach64_freelist_t;
+
+typedef struct drm_mach64_descriptor_ring {
+	void *start;		/* write pointer (cpu address) to start of descriptor ring */
+	u32 start_addr;		/* bus address of beginning of descriptor ring */
+	int size;		/* size of ring in bytes */
+
+	u32 head_addr;		/* bus address of descriptor ring head */
+	u32 head;		/* dword offset of descriptor ring head */
+	u32 tail;		/* dword offset of descriptor ring tail */
+	u32 tail_mask;		/* mask used to wrap ring */
+	int space;		/* number of free bytes in ring */
+} drm_mach64_descriptor_ring_t;
+
+typedef struct drm_mach64_private {
+	drm_mach64_sarea_t *sarea_priv;
+
+	int is_pci;
+	drm_mach64_dma_mode_t driver_mode;	/* Async DMA, sync DMA, or MMIO */
+
+	int usec_timeout;	/* Timeout for the wait functions */
+
+	drm_mach64_descriptor_ring_t ring;	/* DMA descriptor table (ring buffer) */
+	int ring_running;	/* Is bus mastering is enabled */
+
+	struct list_head free_list;	/* Free-list head */
+	struct list_head placeholders;	/* Placeholder list for buffers held by clients */
+	struct list_head pending;	/* Buffers pending completion */
+
+	u32 frame_ofs[MACH64_MAX_QUEUED_FRAMES];	/* dword ring offsets of most recent frame swaps */
+
+	unsigned int fb_bpp;
+	unsigned int front_offset, front_pitch;
+	unsigned int back_offset, back_pitch;
+
+	unsigned int depth_bpp;
+	unsigned int depth_offset, depth_pitch;
+
+	atomic_t vbl_received;          /**< Number of vblanks received. */
+
+	u32 front_offset_pitch;
+	u32 back_offset_pitch;
+	u32 depth_offset_pitch;
+
+	drm_local_map_t *sarea;
+	drm_local_map_t *fb;
+	drm_local_map_t *mmio;
+	drm_local_map_t *ring_map;
+	drm_local_map_t *dev_buffers;	/* this is a pointer to a structure in dev */
+	drm_local_map_t *agp_textures;
+} drm_mach64_private_t;
+
+extern struct drm_ioctl_desc mach64_ioctls[];
+extern int mach64_max_ioctl;
+
+				/* mach64_dma.c */
+extern int mach64_dma_init(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv);
+extern int mach64_dma_idle(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv);
+extern int mach64_dma_flush(struct drm_device *dev, void *data,
+			    struct drm_file *file_priv);
+extern int mach64_engine_reset(struct drm_device *dev, void *data,
+			       struct drm_file *file_priv);
+extern int mach64_dma_buffers(struct drm_device *dev, void *data,
+			      struct drm_file *file_priv);
+extern void mach64_driver_lastclose(struct drm_device * dev);
+
+extern int mach64_init_freelist(struct drm_device * dev);
+extern void mach64_destroy_freelist(struct drm_device * dev);
+extern struct drm_buf *mach64_freelist_get(drm_mach64_private_t * dev_priv);
+extern int mach64_freelist_put(drm_mach64_private_t * dev_priv,
+			       struct drm_buf * copy_buf);
+
+extern int mach64_do_wait_for_fifo(drm_mach64_private_t * dev_priv,
+				   int entries);
+extern int mach64_do_wait_for_idle(drm_mach64_private_t * dev_priv);
+extern int mach64_wait_ring(drm_mach64_private_t * dev_priv, int n);
+extern int mach64_do_dispatch_pseudo_dma(drm_mach64_private_t * dev_priv);
+extern int mach64_do_release_used_buffers(drm_mach64_private_t * dev_priv);
+extern void mach64_dump_engine_info(drm_mach64_private_t * dev_priv);
+extern void mach64_dump_ring_info(drm_mach64_private_t * dev_priv);
+extern int mach64_do_engine_reset(drm_mach64_private_t * dev_priv);
+
+extern int mach64_add_buf_to_ring(drm_mach64_private_t *dev_priv,
+                                  drm_mach64_freelist_t *_entry);
+extern int mach64_add_hostdata_buf_to_ring(drm_mach64_private_t *dev_priv,
+                                           drm_mach64_freelist_t *_entry);
+
+extern int mach64_do_dma_idle(drm_mach64_private_t * dev_priv);
+extern int mach64_do_dma_flush(drm_mach64_private_t * dev_priv);
+extern int mach64_do_cleanup_dma(struct drm_device * dev);
+
+				/* mach64_state.c */
+extern int mach64_dma_clear(struct drm_device *dev, void *data,
+			    struct drm_file *file_priv);
+extern int mach64_dma_swap(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv);
+extern int mach64_dma_vertex(struct drm_device *dev, void *data,
+			     struct drm_file *file_priv);
+extern int mach64_dma_blit(struct drm_device *dev, void *data,
+			   struct drm_file *file_priv);
+extern int mach64_get_param(struct drm_device *dev, void *data,
+			    struct drm_file *file_priv);
+
+extern u32 mach64_get_vblank_counter(struct drm_device *dev, int crtc);
+extern int mach64_enable_vblank(struct drm_device *dev, int crtc);
+extern void mach64_disable_vblank(struct drm_device *dev, int crtc);
+extern irqreturn_t mach64_driver_irq_handler(DRM_IRQ_ARGS);
+extern void mach64_driver_irq_preinstall(struct drm_device *dev);
+extern int mach64_driver_irq_postinstall(struct drm_device *dev);
+extern void mach64_driver_irq_uninstall(struct drm_device *dev);
+
+/* ================================================================
+ * Registers
+ */
+
+#define MACH64_AGP_BASE				0x0148
+#define MACH64_AGP_CNTL				0x014c
+#define MACH64_ALPHA_TST_CNTL			0x0550
+
+#define MACH64_DSP_CONFIG			0x0420
+#define MACH64_DSP_ON_OFF			0x0424
+#define MACH64_EXT_MEM_CNTL			0x04ac
+#define MACH64_GEN_TEST_CNTL			0x04d0
+#define MACH64_HW_DEBUG				0x047c
+#define MACH64_MEM_ADDR_CONFIG			0x0434
+#define MACH64_MEM_BUF_CNTL			0x042c
+#define MACH64_MEM_CNTL				0x04b0
+
+#define MACH64_BM_ADDR				0x0648
+#define MACH64_BM_COMMAND			0x0188
+#define MACH64_BM_DATA				0x0648
+#define MACH64_BM_FRAME_BUF_OFFSET		0x0180
+#define MACH64_BM_GUI_TABLE			0x01b8
+#define MACH64_BM_GUI_TABLE_CMD			0x064c
+#	define MACH64_CIRCULAR_BUF_SIZE_16KB		(0 << 0)
+#	define MACH64_CIRCULAR_BUF_SIZE_32KB		(1 << 0)
+#	define MACH64_CIRCULAR_BUF_SIZE_64KB		(2 << 0)
+#	define MACH64_CIRCULAR_BUF_SIZE_128KB		(3 << 0)
+#	define MACH64_LAST_DESCRIPTOR			(1 << 31)
+#define MACH64_BM_HOSTDATA			0x0644
+#define MACH64_BM_STATUS			0x018c
+#define MACH64_BM_SYSTEM_MEM_ADDR		0x0184
+#define MACH64_BM_SYSTEM_TABLE			0x01bc
+#define MACH64_BUS_CNTL				0x04a0
+#	define MACH64_BUS_MSTR_RESET			(1 << 1)
+#	define MACH64_BUS_APER_REG_DIS			(1 << 4)
+#	define MACH64_BUS_FLUSH_BUF			(1 << 2)
+#	define MACH64_BUS_MASTER_DIS			(1 << 6)
+#	define MACH64_BUS_EXT_REG_EN			(1 << 27)
+
+#define MACH64_CLR_CMP_CLR			0x0700
+#define MACH64_CLR_CMP_CNTL			0x0708
+#define MACH64_CLR_CMP_MASK			0x0704
+#define MACH64_CONFIG_CHIP_ID			0x04e0
+#define MACH64_CONFIG_CNTL			0x04dc
+#define MACH64_CONFIG_STAT0			0x04e4
+#define MACH64_CONFIG_STAT1			0x0494
+#define MACH64_CONFIG_STAT2			0x0498
+#define MACH64_CONTEXT_LOAD_CNTL		0x072c
+#define MACH64_CONTEXT_MASK			0x0720
+#define MACH64_COMPOSITE_SHADOW_ID		0x0798
+#define MACH64_CRC_SIG				0x04e8
+#define MACH64_CUSTOM_MACRO_CNTL		0x04d4
+
+#define MACH64_DP_BKGD_CLR			0x06c0
+#define MACH64_DP_FOG_CLR			0x06c4
+#define MACH64_DP_FGRD_BKGD_CLR			0x06e0
+#define MACH64_DP_FRGD_CLR			0x06c4
+#define MACH64_DP_FGRD_CLR_MIX			0x06dc
+
+#define MACH64_DP_MIX				0x06d4
+#	define BKGD_MIX_NOT_D				(0 << 0)
+#	define BKGD_MIX_ZERO				(1 << 0)
+#	define BKGD_MIX_ONE				(2 << 0)
+#	define MACH64_BKGD_MIX_D			(3 << 0)
+#	define BKGD_MIX_NOT_S				(4 << 0)
+#	define BKGD_MIX_D_XOR_S				(5 << 0)
+#	define BKGD_MIX_NOT_D_XOR_S			(6 << 0)
+#	define MACH64_BKGD_MIX_S			(7 << 0)
+#	define BKGD_MIX_NOT_D_OR_NOT_S			(8 << 0)
+#	define BKGD_MIX_D_OR_NOT_S			(9 << 0)
+#	define BKGD_MIX_NOT_D_OR_S			(10 << 0)
+#	define BKGD_MIX_D_OR_S				(11 << 0)
+#	define BKGD_MIX_D_AND_S				(12 << 0)
+#	define BKGD_MIX_NOT_D_AND_S			(13 << 0)
+#	define BKGD_MIX_D_AND_NOT_S			(14 << 0)
+#	define BKGD_MIX_NOT_D_AND_NOT_S			(15 << 0)
+#	define BKGD_MIX_D_PLUS_S_DIV2			(23 << 0)
+#	define FRGD_MIX_NOT_D				(0 << 16)
+#	define FRGD_MIX_ZERO				(1 << 16)
+#	define FRGD_MIX_ONE				(2 << 16)
+#	define FRGD_MIX_D				(3 << 16)
+#	define FRGD_MIX_NOT_S				(4 << 16)
+#	define FRGD_MIX_D_XOR_S				(5 << 16)
+#	define FRGD_MIX_NOT_D_XOR_S			(6 << 16)
+#	define MACH64_FRGD_MIX_S			(7 << 16)
+#	define FRGD_MIX_NOT_D_OR_NOT_S			(8 << 16)
+#	define FRGD_MIX_D_OR_NOT_S			(9 << 16)
+#	define FRGD_MIX_NOT_D_OR_S			(10 << 16)
+#	define FRGD_MIX_D_OR_S				(11 << 16)
+#	define FRGD_MIX_D_AND_S				(12 << 16)
+#	define FRGD_MIX_NOT_D_AND_S			(13 << 16)
+#	define FRGD_MIX_D_AND_NOT_S			(14 << 16)
+#	define FRGD_MIX_NOT_D_AND_NOT_S			(15 << 16)
+#	define FRGD_MIX_D_PLUS_S_DIV2			(23 << 16)
+
+#define MACH64_DP_PIX_WIDTH			0x06d0
+#	define MACH64_HOST_TRIPLE_ENABLE		(1 << 13)
+#	define MACH64_BYTE_ORDER_MSB_TO_LSB		(0 << 24)
+#	define MACH64_BYTE_ORDER_LSB_TO_MSB		(1 << 24)
+
+#define MACH64_DP_SRC				0x06d8
+#	define MACH64_BKGD_SRC_BKGD_CLR			(0 << 0)
+#	define MACH64_BKGD_SRC_FRGD_CLR			(1 << 0)
+#	define MACH64_BKGD_SRC_HOST			(2 << 0)
+#	define MACH64_BKGD_SRC_BLIT			(3 << 0)
+#	define MACH64_BKGD_SRC_PATTERN			(4 << 0)
+#	define MACH64_BKGD_SRC_3D			(5 << 0)
+#	define MACH64_FRGD_SRC_BKGD_CLR			(0 << 8)
+#	define MACH64_FRGD_SRC_FRGD_CLR			(1 << 8)
+#	define MACH64_FRGD_SRC_HOST			(2 << 8)
+#	define MACH64_FRGD_SRC_BLIT			(3 << 8)
+#	define MACH64_FRGD_SRC_PATTERN			(4 << 8)
+#	define MACH64_FRGD_SRC_3D			(5 << 8)
+#	define MACH64_MONO_SRC_ONE			(0 << 16)
+#	define MACH64_MONO_SRC_PATTERN			(1 << 16)
+#	define MACH64_MONO_SRC_HOST			(2 << 16)
+#	define MACH64_MONO_SRC_BLIT			(3 << 16)
+
+#define MACH64_DP_WRITE_MASK			0x06c8
+
+#define MACH64_DST_CNTL				0x0530
+#	define MACH64_DST_X_RIGHT_TO_LEFT		(0 << 0)
+#	define MACH64_DST_X_LEFT_TO_RIGHT		(1 << 0)
+#	define MACH64_DST_Y_BOTTOM_TO_TOP		(0 << 1)
+#	define MACH64_DST_Y_TOP_TO_BOTTOM		(1 << 1)
+#	define MACH64_DST_X_MAJOR			(0 << 2)
+#	define MACH64_DST_Y_MAJOR			(1 << 2)
+#	define MACH64_DST_X_TILE			(1 << 3)
+#	define MACH64_DST_Y_TILE			(1 << 4)
+#	define MACH64_DST_LAST_PEL			(1 << 5)
+#	define MACH64_DST_POLYGON_ENABLE		(1 << 6)
+#	define MACH64_DST_24_ROTATION_ENABLE		(1 << 7)
+
+#define MACH64_DST_HEIGHT_WIDTH			0x0518
+#define MACH64_DST_OFF_PITCH			0x0500
+#define MACH64_DST_WIDTH_HEIGHT			0x06ec
+#define MACH64_DST_X_Y				0x06e8
+#define MACH64_DST_Y_X				0x050c
+
+#define MACH64_FIFO_STAT			0x0710
+#	define MACH64_FIFO_SLOT_MASK			0x0000ffff
+#	define MACH64_FIFO_ERR				(1 << 31)
+
+#define MACH64_GEN_TEST_CNTL			0x04d0
+#	define MACH64_GUI_ENGINE_ENABLE			(1 << 8)
+#define MACH64_GUI_CMDFIFO_DEBUG		0x0170
+#define MACH64_GUI_CMDFIFO_DATA			0x0174
+#define MACH64_GUI_CNTL				0x0178
+#       define MACH64_CMDFIFO_SIZE_MASK                 0x00000003ul
+#       define MACH64_CMDFIFO_SIZE_192                  0x00000000ul
+#       define MACH64_CMDFIFO_SIZE_128                  0x00000001ul
+#       define MACH64_CMDFIFO_SIZE_64                   0x00000002ul
+#define MACH64_GUI_STAT				0x0738
+#	define MACH64_GUI_ACTIVE			(1 << 0)
+#define MACH64_GUI_TRAJ_CNTL			0x0730
+
+#define MACH64_HOST_CNTL			0x0640
+#define MACH64_HOST_DATA0			0x0600
+
+#define MACH64_ONE_OVER_AREA			0x029c
+#define MACH64_ONE_OVER_AREA_UC			0x0300
+
+#define MACH64_PAT_REG0				0x0680
+#define MACH64_PAT_REG1				0x0684
+
+#define MACH64_SC_LEFT                          0x06a0
+#define MACH64_SC_RIGHT                         0x06a4
+#define MACH64_SC_LEFT_RIGHT                    0x06a8
+#define MACH64_SC_TOP                           0x06ac
+#define MACH64_SC_BOTTOM                        0x06b0
+#define MACH64_SC_TOP_BOTTOM                    0x06b4
+
+#define MACH64_SCALE_3D_CNTL			0x05fc
+#define MACH64_SCRATCH_REG0			0x0480
+#define MACH64_SCRATCH_REG1			0x0484
+#define MACH64_SECONDARY_TEX_OFF		0x0778
+#define MACH64_SETUP_CNTL			0x0304
+#define MACH64_SRC_CNTL				0x05b4
+#	define MACH64_SRC_BM_ENABLE			(1 << 8)
+#	define MACH64_SRC_BM_SYNC			(1 << 9)
+#	define MACH64_SRC_BM_OP_FRAME_TO_SYSTEM		(0 << 10)
+#	define MACH64_SRC_BM_OP_SYSTEM_TO_FRAME		(1 << 10)
+#	define MACH64_SRC_BM_OP_REG_TO_SYSTEM		(2 << 10)
+#	define MACH64_SRC_BM_OP_SYSTEM_TO_REG		(3 << 10)
+#define MACH64_SRC_HEIGHT1			0x0594
+#define MACH64_SRC_HEIGHT2			0x05ac
+#define MACH64_SRC_HEIGHT1_WIDTH1		0x0598
+#define MACH64_SRC_HEIGHT2_WIDTH2		0x05b0
+#define MACH64_SRC_OFF_PITCH			0x0580
+#define MACH64_SRC_WIDTH1			0x0590
+#define MACH64_SRC_Y_X				0x058c
+
+#define MACH64_TEX_0_OFF			0x05c0
+#define MACH64_TEX_CNTL				0x0774
+#define MACH64_TEX_SIZE_PITCH			0x0770
+#define MACH64_TIMER_CONFIG			0x0428
+
+#define MACH64_VERTEX_1_ARGB			0x0254
+#define MACH64_VERTEX_1_S			0x0240
+#define MACH64_VERTEX_1_SECONDARY_S		0x0328
+#define MACH64_VERTEX_1_SECONDARY_T		0x032c
+#define MACH64_VERTEX_1_SECONDARY_W		0x0330
+#define MACH64_VERTEX_1_SPEC_ARGB		0x024c
+#define MACH64_VERTEX_1_T			0x0244
+#define MACH64_VERTEX_1_W			0x0248
+#define MACH64_VERTEX_1_X_Y			0x0258
+#define MACH64_VERTEX_1_Z			0x0250
+#define MACH64_VERTEX_2_ARGB			0x0274
+#define MACH64_VERTEX_2_S			0x0260
+#define MACH64_VERTEX_2_SECONDARY_S		0x0334
+#define MACH64_VERTEX_2_SECONDARY_T		0x0338
+#define MACH64_VERTEX_2_SECONDARY_W		0x033c
+#define MACH64_VERTEX_2_SPEC_ARGB		0x026c
+#define MACH64_VERTEX_2_T			0x0264
+#define MACH64_VERTEX_2_W			0x0268
+#define MACH64_VERTEX_2_X_Y			0x0278
+#define MACH64_VERTEX_2_Z			0x0270
+#define MACH64_VERTEX_3_ARGB			0x0294
+#define MACH64_VERTEX_3_S			0x0280
+#define MACH64_VERTEX_3_SECONDARY_S		0x02a0
+#define MACH64_VERTEX_3_SECONDARY_T		0x02a4
+#define MACH64_VERTEX_3_SECONDARY_W		0x02a8
+#define MACH64_VERTEX_3_SPEC_ARGB		0x028c
+#define MACH64_VERTEX_3_T			0x0284
+#define MACH64_VERTEX_3_W			0x0288
+#define MACH64_VERTEX_3_X_Y			0x0298
+#define MACH64_VERTEX_3_Z			0x0290
+
+#define MACH64_Z_CNTL				0x054c
+#define MACH64_Z_OFF_PITCH			0x0548
+
+#define MACH64_CRTC_VLINE_CRNT_VLINE		0x0410
+#	define MACH64_CRTC_VLINE_MASK		        0x000007ff
+#	define MACH64_CRTC_CRNT_VLINE_MASK		0x07ff0000
+#define MACH64_CRTC_OFF_PITCH			0x0414
+#define MACH64_CRTC_INT_CNTL			0x0418
+#	define MACH64_CRTC_VBLANK			(1 << 0)
+#	define MACH64_CRTC_VBLANK_INT_EN		(1 << 1)
+#	define MACH64_CRTC_VBLANK_INT			(1 << 2)
+#	define MACH64_CRTC_VLINE_INT_EN			(1 << 3)
+#	define MACH64_CRTC_VLINE_INT			(1 << 4)
+#	define MACH64_CRTC_VLINE_SYNC			(1 << 5)	/* 0=even, 1=odd */
+#	define MACH64_CRTC_FRAME			(1 << 6)	/* 0=even, 1=odd */
+#	define MACH64_CRTC_SNAPSHOT_INT_EN		(1 << 7)
+#	define MACH64_CRTC_SNAPSHOT_INT			(1 << 8)
+#	define MACH64_CRTC_I2C_INT_EN			(1 << 9)
+#	define MACH64_CRTC_I2C_INT			(1 << 10)
+#	define MACH64_CRTC2_VBLANK			(1 << 11)	/* LT Pro */
+#	define MACH64_CRTC2_VBLANK_INT_EN		(1 << 12)	/* LT Pro */
+#	define MACH64_CRTC2_VBLANK_INT			(1 << 13)	/* LT Pro */
+#	define MACH64_CRTC2_VLINE_INT_EN		(1 << 14)	/* LT Pro */
+#	define MACH64_CRTC2_VLINE_INT			(1 << 15)	/* LT Pro */
+#	define MACH64_CRTC_CAPBUF0_INT_EN		(1 << 16)
+#	define MACH64_CRTC_CAPBUF0_INT			(1 << 17)
+#	define MACH64_CRTC_CAPBUF1_INT_EN		(1 << 18)
+#	define MACH64_CRTC_CAPBUF1_INT			(1 << 19)
+#	define MACH64_CRTC_OVERLAY_EOF_INT_EN		(1 << 20)
+#	define MACH64_CRTC_OVERLAY_EOF_INT		(1 << 21)
+#	define MACH64_CRTC_ONESHOT_CAP_INT_EN		(1 << 22)
+#	define MACH64_CRTC_ONESHOT_CAP_INT		(1 << 23)
+#	define MACH64_CRTC_BUSMASTER_EOL_INT_EN		(1 << 24)
+#	define MACH64_CRTC_BUSMASTER_EOL_INT		(1 << 25)
+#	define MACH64_CRTC_GP_INT_EN			(1 << 26)
+#	define MACH64_CRTC_GP_INT			(1 << 27)
+#	define MACH64_CRTC2_VLINE_SYNC			(1 << 28) /* LT Pro */	/* 0=even, 1=odd */
+#	define MACH64_CRTC_SNAPSHOT2_INT_EN		(1 << 29)	/* LT Pro */
+#	define MACH64_CRTC_SNAPSHOT2_INT		(1 << 30)	/* LT Pro */
+#	define MACH64_CRTC_VBLANK2_INT			(1 << 31)
+#	define MACH64_CRTC_INT_ENS				\
+		(						\
+			MACH64_CRTC_VBLANK_INT_EN |		\
+			MACH64_CRTC_VLINE_INT_EN |		\
+			MACH64_CRTC_SNAPSHOT_INT_EN |		\
+			MACH64_CRTC_I2C_INT_EN |		\
+			MACH64_CRTC2_VBLANK_INT_EN |		\
+			MACH64_CRTC2_VLINE_INT_EN |		\
+			MACH64_CRTC_CAPBUF0_INT_EN |		\
+			MACH64_CRTC_CAPBUF1_INT_EN |		\
+			MACH64_CRTC_OVERLAY_EOF_INT_EN |	\
+			MACH64_CRTC_ONESHOT_CAP_INT_EN |	\
+			MACH64_CRTC_BUSMASTER_EOL_INT_EN |	\
+			MACH64_CRTC_GP_INT_EN |			\
+			MACH64_CRTC_SNAPSHOT2_INT_EN |		\
+			0					\
+		)
+#	define MACH64_CRTC_INT_ACKS			\
+		(					\
+			MACH64_CRTC_VBLANK_INT |	\
+			MACH64_CRTC_VLINE_INT |		\
+			MACH64_CRTC_SNAPSHOT_INT |	\
+			MACH64_CRTC_I2C_INT |		\
+			MACH64_CRTC2_VBLANK_INT |	\
+			MACH64_CRTC2_VLINE_INT |	\
+			MACH64_CRTC_CAPBUF0_INT |	\
+			MACH64_CRTC_CAPBUF1_INT |	\
+			MACH64_CRTC_OVERLAY_EOF_INT |	\
+			MACH64_CRTC_ONESHOT_CAP_INT |	\
+			MACH64_CRTC_BUSMASTER_EOL_INT |	\
+			MACH64_CRTC_GP_INT |		\
+			MACH64_CRTC_SNAPSHOT2_INT |	\
+			MACH64_CRTC_VBLANK2_INT |	\
+			0				\
+		)
+
+#define MACH64_DATATYPE_CI8				2
+#define MACH64_DATATYPE_ARGB1555			3
+#define MACH64_DATATYPE_RGB565				4
+#define MACH64_DATATYPE_ARGB8888			6
+#define MACH64_DATATYPE_RGB332				7
+#define MACH64_DATATYPE_Y8				8
+#define MACH64_DATATYPE_RGB8				9
+#define MACH64_DATATYPE_VYUY422				11
+#define MACH64_DATATYPE_YVYU422				12
+#define MACH64_DATATYPE_AYUV444				14
+#define MACH64_DATATYPE_ARGB4444			15
+
+#define MACH64_READ(reg)	DRM_READ32(dev_priv->mmio, (reg) )
+#define MACH64_WRITE(reg,val)	DRM_WRITE32(dev_priv->mmio, (reg), (val) )
+
+#define DWMREG0		0x0400
+#define DWMREG0_END	0x07ff
+#define DWMREG1		0x0000
+#define DWMREG1_END	0x03ff
+
+#define ISREG0(r)	(((r) >= DWMREG0) && ((r) <= DWMREG0_END))
+#define DMAREG0(r)	(((r) - DWMREG0) >> 2)
+#define DMAREG1(r)	((((r) - DWMREG1) >> 2 ) | 0x0100)
+#define DMAREG(r)	(ISREG0(r) ? DMAREG0(r) : DMAREG1(r))
+
+#define MMREG0		0x0000
+#define MMREG0_END	0x00ff
+
+#define ISMMREG0(r)	(((r) >= MMREG0) && ((r) <= MMREG0_END))
+#define MMSELECT0(r)	(((r) << 2) + DWMREG0)
+#define MMSELECT1(r)	(((((r) & 0xff) << 2) + DWMREG1))
+#define MMSELECT(r)	(ISMMREG0(r) ? MMSELECT0(r) : MMSELECT1(r))
+
+/* ================================================================
+ * DMA constants
+ */
+
+/* DMA descriptor field indices:
+ * The descriptor fields are loaded into the read-only
+ * BM_* system bus master registers during a bus-master operation
+ */
+#define MACH64_DMA_FRAME_BUF_OFFSET	0	/* BM_FRAME_BUF_OFFSET */
+#define MACH64_DMA_SYS_MEM_ADDR		1	/* BM_SYSTEM_MEM_ADDR */
+#define MACH64_DMA_COMMAND		2	/* BM_COMMAND */
+#define MACH64_DMA_RESERVED		3	/* BM_STATUS */
+
+/* BM_COMMAND descriptor field flags */
+#define MACH64_DMA_HOLD_OFFSET		(1<<30)	/* Don't increment DMA_FRAME_BUF_OFFSET */
+#define MACH64_DMA_EOL			(1<<31)	/* End of descriptor list flag */
+
+#define MACH64_DMA_CHUNKSIZE	        0x1000	/* 4kB per DMA descriptor */
+#define MACH64_APERTURE_OFFSET	        0x7ff800	/* frame-buffer offset for gui-masters */
+
+/* ================================================================
+ * Ring operations
+ *
+ * Since the Mach64 bus master engine requires polling, these functions end
+ * up being called frequently, hence being inline.
+ */
+
+static __inline__ void mach64_ring_start(drm_mach64_private_t * dev_priv)
+{
+	drm_mach64_descriptor_ring_t *ring = &dev_priv->ring;
+
+	DRM_DEBUG("head_addr: 0x%08x head: %d tail: %d space: %d\n",
+		  ring->head_addr, ring->head, ring->tail, ring->space);
+
+	if (mach64_do_wait_for_idle(dev_priv) < 0) {
+		mach64_do_engine_reset(dev_priv);
+	}
+
+	if (dev_priv->driver_mode != MACH64_MODE_MMIO) {
+		/* enable bus mastering and block 1 registers */
+		MACH64_WRITE(MACH64_BUS_CNTL,
+			     (MACH64_READ(MACH64_BUS_CNTL) &
+			      ~MACH64_BUS_MASTER_DIS)
+			     | MACH64_BUS_EXT_REG_EN);
+		mach64_do_wait_for_idle(dev_priv);
+	}
+
+	/* reset descriptor table ring head */
+	MACH64_WRITE(MACH64_BM_GUI_TABLE_CMD,
+		     ring->head_addr | MACH64_CIRCULAR_BUF_SIZE_16KB);
+
+	dev_priv->ring_running = 1;
+}
+
+static __inline__ void mach64_ring_resume(drm_mach64_private_t * dev_priv,
+					  drm_mach64_descriptor_ring_t * ring)
+{
+	DRM_DEBUG("head_addr: 0x%08x head: %d tail: %d space: %d\n",
+		  ring->head_addr, ring->head, ring->tail, ring->space);
+
+	/* reset descriptor table ring head */
+	MACH64_WRITE(MACH64_BM_GUI_TABLE_CMD,
+		     ring->head_addr | MACH64_CIRCULAR_BUF_SIZE_16KB);
+
+	if (dev_priv->driver_mode == MACH64_MODE_MMIO) {
+		mach64_do_dispatch_pseudo_dma(dev_priv);
+	} else {
+		/* enable GUI bus mastering, and sync the bus master to the GUI */
+		MACH64_WRITE(MACH64_SRC_CNTL,
+			     MACH64_SRC_BM_ENABLE | MACH64_SRC_BM_SYNC |
+			     MACH64_SRC_BM_OP_SYSTEM_TO_REG);
+
+		/* kick off the transfer */
+		MACH64_WRITE(MACH64_DST_HEIGHT_WIDTH, 0);
+		if (dev_priv->driver_mode == MACH64_MODE_DMA_SYNC) {
+			if ((mach64_do_wait_for_idle(dev_priv)) < 0) {
+				DRM_ERROR("idle failed, resetting engine\n");
+				mach64_dump_engine_info(dev_priv);
+				mach64_do_engine_reset(dev_priv);
+				return;
+			}
+			mach64_do_release_used_buffers(dev_priv);
+		}
+	}
+}
+
+/**
+ * Poll the ring head and make sure the bus master is alive.
+ *
+ * Mach64's bus master engine will stop if there are no more entries to process.
+ * This function polls the engine for the last processed entry and calls
+ * mach64_ring_resume if there is an unprocessed entry.
+ *
+ * Note also that, since we update the ring tail while the bus master engine is
+ * in operation, it is possible that the last tail update was too late to be
+ * processed, and the bus master engine stops at the previous tail position.
+ * Therefore it is important to call this function frequently.
+ */
+static __inline__ void mach64_ring_tick(drm_mach64_private_t * dev_priv,
+					drm_mach64_descriptor_ring_t * ring)
+{
+	DRM_DEBUG("head_addr: 0x%08x head: %d tail: %d space: %d\n",
+		  ring->head_addr, ring->head, ring->tail, ring->space);
+
+	if (!dev_priv->ring_running) {
+		mach64_ring_start(dev_priv);
+
+		if (ring->head != ring->tail) {
+			mach64_ring_resume(dev_priv, ring);
+		}
+	} else {
+		/* GUI_ACTIVE must be read before BM_GUI_TABLE to
+		 * correctly determine the ring head
+		 */
+		int gui_active =
+		    MACH64_READ(MACH64_GUI_STAT) & MACH64_GUI_ACTIVE;
+
+		ring->head_addr = MACH64_READ(MACH64_BM_GUI_TABLE) & 0xfffffff0;
+
+		if (gui_active) {
+			/* If not idle, BM_GUI_TABLE points one descriptor
+			 * past the current head
+			 */
+			if (ring->head_addr == ring->start_addr) {
+				ring->head_addr += ring->size;
+			}
+			ring->head_addr -= 4 * sizeof(u32);
+		}
+
+		if (ring->head_addr < ring->start_addr ||
+		    ring->head_addr >= ring->start_addr + ring->size) {
+			DRM_ERROR("bad ring head address: 0x%08x\n",
+				  ring->head_addr);
+			mach64_dump_ring_info(dev_priv);
+			mach64_do_engine_reset(dev_priv);
+			return;
+		}
+
+		ring->head = (ring->head_addr - ring->start_addr) / sizeof(u32);
+
+		if (!gui_active && ring->head != ring->tail) {
+			mach64_ring_resume(dev_priv, ring);
+		}
+	}
+}
+
+static __inline__ void mach64_ring_stop(drm_mach64_private_t * dev_priv)
+{
+	DRM_DEBUG("head_addr: 0x%08x head: %d tail: %d space: %d\n",
+		  dev_priv->ring.head_addr, dev_priv->ring.head,
+		  dev_priv->ring.tail, dev_priv->ring.space);
+
+	/* restore previous SRC_CNTL to disable busmastering */
+	mach64_do_wait_for_fifo(dev_priv, 1);
+	MACH64_WRITE(MACH64_SRC_CNTL, 0);
+
+	/* disable busmastering but keep the block 1 registers enabled */
+	mach64_do_wait_for_idle(dev_priv);
+	MACH64_WRITE(MACH64_BUS_CNTL, MACH64_READ(MACH64_BUS_CNTL)
+		     | MACH64_BUS_MASTER_DIS | MACH64_BUS_EXT_REG_EN);
+
+	dev_priv->ring_running = 0;
+}
+
+static __inline__ void
+mach64_update_ring_snapshot(drm_mach64_private_t * dev_priv)
+{
+	drm_mach64_descriptor_ring_t *ring = &dev_priv->ring;
+
+	DRM_DEBUG("\n");
+
+	mach64_ring_tick(dev_priv, ring);
+
+	ring->space = (ring->head - ring->tail) * sizeof(u32);
+	if (ring->space <= 0) {
+		ring->space += ring->size;
+	}
+}
+
+/* ================================================================
+ * DMA macros
+ *
+ * Mach64's ring buffer doesn't take register writes directly. These
+ * have to be written indirectly in DMA buffers. These macros simplify
+ * the task of setting up a buffer, writing commands to it, and
+ * queuing the buffer in the ring.
+ */
+
+#define DMALOCALS				\
+	drm_mach64_freelist_t *_entry = NULL;	\
+	struct drm_buf *_buf = NULL;		\
+	u32 *_buf_wptr; int _outcount
+
+#define GETBUFPTR( __buf )						\
+((dev_priv->is_pci) ?							\
+	((u32 *)(__buf)->address) :					\
+	((u32 *)((char *)dev_priv->dev_buffers->handle + (__buf)->offset)))
+
+#define GETBUFADDR( __buf ) ((u32)(__buf)->bus_address)
+
+#define GETRINGOFFSET() (_entry->ring_ofs)
+
+static __inline__ int mach64_find_pending_buf_entry(drm_mach64_private_t *
+						    dev_priv,
+						    drm_mach64_freelist_t **
+						    entry, struct drm_buf * buf)
+{
+	struct list_head *ptr;
+#if MACH64_EXTRA_CHECKING
+	if (list_empty(&dev_priv->pending)) {
+		DRM_ERROR("Empty pending list in \n");
+		return -EINVAL;
+	}
+#endif
+	ptr = dev_priv->pending.prev;
+	*entry = list_entry(ptr, drm_mach64_freelist_t, list);
+	while ((*entry)->buf != buf) {
+		if (ptr == &dev_priv->pending) {
+			return -EFAULT;
+		}
+		ptr = ptr->prev;
+		*entry = list_entry(ptr, drm_mach64_freelist_t, list);
+	}
+	return 0;
+}
+
+#define DMASETPTR( _p )				\
+do {						\
+	_buf = (_p);				\
+	_outcount = 0;				\
+	_buf_wptr = GETBUFPTR( _buf );		\
+} while(0)
+
+/* FIXME: use a private set of smaller buffers for state emits, clears, and swaps? */
+#define DMAGETPTR( file_priv, dev_priv, n )				\
+do {									\
+	if ( MACH64_VERBOSE ) {						\
+		DRM_INFO( "DMAGETPTR( %d )\n", (n) );			\
+	}								\
+	_buf = mach64_freelist_get( dev_priv );				\
+	if (_buf == NULL) {						\
+		DRM_ERROR("couldn't get buffer in DMAGETPTR\n");	\
+		return -EAGAIN;					\
+	}								\
+	if (_buf->pending) {						\
+	        DRM_ERROR("pending buf in DMAGETPTR\n");		\
+		return -EFAULT;					\
+	}								\
+	_buf->file_priv = file_priv;					\
+	_outcount = 0;							\
+									\
+        _buf_wptr = GETBUFPTR( _buf );					\
+} while (0)
+
+#define DMAOUTREG( reg, val )					\
+do {								\
+	if ( MACH64_VERBOSE ) {					\
+		DRM_INFO( "   DMAOUTREG( 0x%x = 0x%08x )\n",	\
+			  reg, val );				\
+	}							\
+	_buf_wptr[_outcount++] = cpu_to_le32(DMAREG(reg));	\
+	_buf_wptr[_outcount++] = cpu_to_le32((val));		\
+	_buf->used += 8;					\
+} while (0)
+
+#define DMAADVANCE( dev_priv, _discard )				\
+	do {								\
+		struct list_head *ptr;					\
+		int ret;						\
+									\
+		if ( MACH64_VERBOSE ) {					\
+			DRM_INFO( "DMAADVANCE() in \n" );		\
+		}							\
+									\
+		if (_buf->used <= 0) {					\
+			DRM_ERROR( "DMAADVANCE(): sending empty buf %d\n", \
+				   _buf->idx );				\
+			return -EFAULT;					\
+		}							\
+		if (_buf->pending) {					\
+			/* This is a resued buffer, so we need to find it in the pending list */ \
+			if ((ret = mach64_find_pending_buf_entry(dev_priv, &_entry, _buf))) { \
+				DRM_ERROR( "DMAADVANCE(): couldn't find pending buf %d\n", _buf->idx );	\
+				return ret;				\
+			}						\
+			if (_entry->discard) {				\
+				DRM_ERROR( "DMAADVANCE(): sending discarded pending buf %d\n", _buf->idx ); \
+				return -EFAULT;				\
+			}						\
+		} else {						\
+			if (list_empty(&dev_priv->placeholders)) {	\
+				DRM_ERROR( "DMAADVANCE(): empty placeholder list\n"); \
+				return -EFAULT;				\
+			}						\
+			ptr = dev_priv->placeholders.next;		\
+			list_del(ptr);					\
+			_entry = list_entry(ptr, drm_mach64_freelist_t, list); \
+			_buf->pending = 1;				\
+			_entry->buf = _buf;				\
+			list_add_tail(ptr, &dev_priv->pending);		\
+		}							\
+		_entry->discard = (_discard);				\
+		if ((ret = mach64_add_buf_to_ring( dev_priv, _entry ))) \
+			return ret;					\
+	} while (0)
+
+#define DMADISCARDBUF()							\
+	do {								\
+		if (_entry == NULL) {					\
+			int ret;					\
+			if ((ret = mach64_find_pending_buf_entry(dev_priv, &_entry, _buf))) { \
+				DRM_ERROR( "couldn't find pending buf %d\n", \
+					   _buf->idx );			\
+				return ret;				\
+			}						\
+		}							\
+		_entry->discard = 1;					\
+	} while(0)
+
+#define DMAADVANCEHOSTDATA( dev_priv )					\
+	do {								\
+		struct list_head *ptr;					\
+		int ret;						\
+									\
+		if ( MACH64_VERBOSE ) {					\
+			DRM_INFO( "DMAADVANCEHOSTDATA() in \n" );	\
+		}							\
+									\
+		if (_buf->used <= 0) {					\
+			DRM_ERROR( "DMAADVANCEHOSTDATA(): sending empty buf %d\n", _buf->idx );	\
+			return -EFAULT;					\
+		}							\
+		if (list_empty(&dev_priv->placeholders)) {		\
+			DRM_ERROR( "empty placeholder list in DMAADVANCEHOSTDATA()\n" ); \
+			return -EFAULT;					\
+		}							\
+									\
+		ptr = dev_priv->placeholders.next;			\
+		list_del(ptr);						\
+		_entry = list_entry(ptr, drm_mach64_freelist_t, list);	\
+		_entry->buf = _buf;					\
+		_entry->buf->pending = 1;				\
+		list_add_tail(ptr, &dev_priv->pending);			\
+		_entry->discard = 1;					\
+		if ((ret = mach64_add_hostdata_buf_to_ring( dev_priv, _entry ))) \
+			return ret;					\
+	} while (0)
+
+#endif				/* __MACH64_DRV_H__ */
--- /dev/null
+++ b/drivers/gpu/drm/mach64/mach64_irq.c
@@ -0,0 +1,159 @@
+/* mach64_irq.c -- IRQ handling for ATI Mach64 -*- linux-c -*-
+ * Created: Tue Feb 25, 2003 by Leif Delgass, based on radeon_irq.c/r128_irq.c
+ */
+/*-
+ * Copyright (C) The Weather Channel, Inc.  2002.
+ * Copyright 2003 Leif Delgass
+ * All Rights Reserved.
+ *
+ * The Weather Channel (TM) funded Tungsten Graphics to develop the
+ * initial release of the Radeon 8500 driver under the XFree86 license.
+ * This notice must be preserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Keith Whitwell <keith@tungstengraphics.com>
+ *    Eric Anholt <anholt@FreeBSD.org>
+ *    Leif Delgass <ldelgass@retinalburn.net>
+ */
+
+#include "drmP.h"
+#include "drm.h"
+#include "mach64_drm.h"
+#include "mach64_drv.h"
+
+irqreturn_t mach64_driver_irq_handler(DRM_IRQ_ARGS)
+{
+	struct drm_device *dev = arg;
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+	int status;
+
+	status = MACH64_READ(MACH64_CRTC_INT_CNTL);
+
+	/* VBLANK interrupt */
+	if (status & MACH64_CRTC_VBLANK_INT) {
+		/* Mask off all interrupt ack bits before setting the ack bit, since
+		 * there may be other handlers outside the DRM.
+		 *
+		 * NOTE: On mach64, you need to keep the enable bits set when doing
+		 * the ack, despite what the docs say about not acking and enabling
+		 * in a single write.
+		 */
+		MACH64_WRITE(MACH64_CRTC_INT_CNTL,
+			     (status & ~MACH64_CRTC_INT_ACKS)
+			     | MACH64_CRTC_VBLANK_INT);
+
+		atomic_inc(&dev_priv->vbl_received);
+		drm_handle_vblank(dev, 0);
+		return IRQ_HANDLED;
+	}
+	return IRQ_NONE;
+}
+
+u32 mach64_get_vblank_counter(struct drm_device * dev, int crtc)
+{
+	const drm_mach64_private_t *const dev_priv = dev->dev_private;
+
+	if (crtc != 0)
+		return 0;
+
+	return atomic_read(&dev_priv->vbl_received);
+}
+
+int mach64_enable_vblank(struct drm_device * dev, int crtc)
+{
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+	u32 status = MACH64_READ(MACH64_CRTC_INT_CNTL);
+
+	if (crtc != 0) {
+		DRM_ERROR("tried to enable vblank on non-existent crtc %d\n",
+			  crtc);
+		return -EINVAL;
+	}
+
+	DRM_DEBUG("before enable vblank CRTC_INT_CTNL: 0x%08x\n", status);
+
+	/* Turn on VBLANK interrupt */
+	MACH64_WRITE(MACH64_CRTC_INT_CNTL, MACH64_READ(MACH64_CRTC_INT_CNTL)
+		     | MACH64_CRTC_VBLANK_INT_EN);
+
+	return 0;
+}
+
+void mach64_disable_vblank(struct drm_device * dev, int crtc)
+{
+	if (crtc != 0) {
+		DRM_ERROR("tried to disable vblank on non-existent crtc %d\n",
+			  crtc);
+		return;
+	}
+
+	/*
+	 * FIXME: implement proper interrupt disable by using the vblank
+	 * counter register (if available).
+	 */
+}
+
+static void mach64_disable_vblank_local(struct drm_device * dev, int crtc)
+{
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+	u32 status = MACH64_READ(MACH64_CRTC_INT_CNTL);
+
+	if (crtc != 0) {
+		DRM_ERROR("tried to disable vblank on non-existent crtc %d\n",
+			  crtc);
+		return;
+	}
+
+	DRM_DEBUG("before disable vblank CRTC_INT_CTNL: 0x%08x\n", status);
+
+	/* Disable and clear VBLANK interrupt */
+	MACH64_WRITE(MACH64_CRTC_INT_CNTL, (status & ~MACH64_CRTC_VBLANK_INT_EN)
+		     | MACH64_CRTC_VBLANK_INT);
+}
+
+void mach64_driver_irq_preinstall(struct drm_device * dev)
+{
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+
+	u32 status = MACH64_READ(MACH64_CRTC_INT_CNTL);
+
+	DRM_DEBUG("before install CRTC_INT_CTNL: 0x%08x\n", status);
+
+	mach64_disable_vblank_local(dev, 0);
+}
+
+int mach64_driver_irq_postinstall(struct drm_device * dev)
+{
+	return drm_vblank_init(dev, 1);
+}
+
+void mach64_driver_irq_uninstall(struct drm_device * dev)
+{
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+	if (!dev_priv)
+		return;
+
+	mach64_disable_vblank_local(dev, 0);
+
+	DRM_DEBUG("after uninstall CRTC_INT_CTNL: 0x%08x\n",
+		  MACH64_READ(MACH64_CRTC_INT_CNTL));
+}
--- /dev/null
+++ b/drivers/gpu/drm/mach64/mach64_state.c
@@ -0,0 +1,910 @@
+/* mach64_state.c -- State support for mach64 (Rage Pro) driver -*- linux-c -*-
+ * Created: Sun Dec 03 19:20:26 2000 by gareth@valinux.com
+ */
+/*
+ * Copyright 2000 Gareth Hughes
+ * Copyright 2002-2003 Leif Delgass
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT OWNER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Gareth Hughes <gareth@valinux.com>
+ *    Leif Delgass <ldelgass@retinalburn.net>
+ *    José Fonseca <j_r_fonseca@yahoo.co.uk>
+ */
+
+#include "drmP.h"
+#include "drm.h"
+#include "mach64_drm.h"
+#include "mach64_drv.h"
+
+/* Interface history:
+ *
+ * 1.0 - Initial mach64 DRM
+ *
+ */
+struct drm_ioctl_desc mach64_ioctls[] = {
+	DRM_IOCTL_DEF(DRM_MACH64_INIT, mach64_dma_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
+	DRM_IOCTL_DEF(DRM_MACH64_CLEAR, mach64_dma_clear, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_MACH64_SWAP, mach64_dma_swap, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_MACH64_IDLE, mach64_dma_idle, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_MACH64_RESET, mach64_engine_reset, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_MACH64_VERTEX, mach64_dma_vertex, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_MACH64_BLIT, mach64_dma_blit, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_MACH64_FLUSH, mach64_dma_flush, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_MACH64_GETPARAM, mach64_get_param, DRM_AUTH),
+};
+
+int mach64_max_ioctl = DRM_ARRAY_SIZE(mach64_ioctls);
+
+/* ================================================================
+ * DMA hardware state programming functions
+ */
+
+static void mach64_print_dirty(const char *msg, unsigned int flags)
+{
+	DRM_DEBUG("%s: (0x%x) %s%s%s%s%s%s%s%s%s%s%s%s\n",
+		  msg,
+		  flags,
+		  (flags & MACH64_UPLOAD_DST_OFF_PITCH) ? "dst_off_pitch, " :
+		  "",
+		  (flags & MACH64_UPLOAD_Z_ALPHA_CNTL) ? "z_alpha_cntl, " : "",
+		  (flags & MACH64_UPLOAD_SCALE_3D_CNTL) ? "scale_3d_cntl, " :
+		  "", (flags & MACH64_UPLOAD_DP_FOG_CLR) ? "dp_fog_clr, " : "",
+		  (flags & MACH64_UPLOAD_DP_WRITE_MASK) ? "dp_write_mask, " :
+		  "",
+		  (flags & MACH64_UPLOAD_DP_PIX_WIDTH) ? "dp_pix_width, " : "",
+		  (flags & MACH64_UPLOAD_SETUP_CNTL) ? "setup_cntl, " : "",
+		  (flags & MACH64_UPLOAD_MISC) ? "misc, " : "",
+		  (flags & MACH64_UPLOAD_TEXTURE) ? "texture, " : "",
+		  (flags & MACH64_UPLOAD_TEX0IMAGE) ? "tex0 image, " : "",
+		  (flags & MACH64_UPLOAD_TEX1IMAGE) ? "tex1 image, " : "",
+		  (flags & MACH64_UPLOAD_CLIPRECTS) ? "cliprects, " : "");
+}
+
+/* Mach64 doesn't have hardware cliprects, just one hardware scissor,
+ * so the GL scissor is intersected with each cliprect here
+ */
+/* This function returns 0 on success, 1 for no intersection, and
+ * negative for an error
+ */
+static int mach64_emit_cliprect(struct drm_file *file_priv,
+				drm_mach64_private_t * dev_priv,
+				struct drm_clip_rect * box)
+{
+	u32 sc_left_right, sc_top_bottom;
+	struct drm_clip_rect scissor;
+	drm_mach64_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	drm_mach64_context_regs_t *regs = &sarea_priv->context_state;
+	DMALOCALS;
+
+	DRM_DEBUG("box=%p\n", box);
+
+	/* Get GL scissor */
+	/* FIXME: store scissor in SAREA as a cliprect instead of in
+	 * hardware format, or do intersection client-side
+	 */
+	scissor.x1 = regs->sc_left_right & 0xffff;
+	scissor.x2 = (regs->sc_left_right & 0xffff0000) >> 16;
+	scissor.y1 = regs->sc_top_bottom & 0xffff;
+	scissor.y2 = (regs->sc_top_bottom & 0xffff0000) >> 16;
+
+	/* Intersect GL scissor with cliprect */
+	if (box->x1 > scissor.x1)
+		scissor.x1 = box->x1;
+	if (box->y1 > scissor.y1)
+		scissor.y1 = box->y1;
+	if (box->x2 < scissor.x2)
+		scissor.x2 = box->x2;
+	if (box->y2 < scissor.y2)
+		scissor.y2 = box->y2;
+	/* positive return means skip */
+	if (scissor.x1 >= scissor.x2)
+		return 1;
+	if (scissor.y1 >= scissor.y2)
+		return 1;
+
+	DMAGETPTR(file_priv, dev_priv, 2);	/* returns on failure to get buffer */
+
+	sc_left_right = ((scissor.x1 << 0) | (scissor.x2 << 16));
+	sc_top_bottom = ((scissor.y1 << 0) | (scissor.y2 << 16));
+
+	DMAOUTREG(MACH64_SC_LEFT_RIGHT, sc_left_right);
+	DMAOUTREG(MACH64_SC_TOP_BOTTOM, sc_top_bottom);
+
+	DMAADVANCE(dev_priv, 1);
+
+	return 0;
+}
+
+static __inline__ int mach64_emit_state(struct drm_file *file_priv,
+					drm_mach64_private_t * dev_priv)
+{
+	drm_mach64_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	drm_mach64_context_regs_t *regs = &sarea_priv->context_state;
+	unsigned int dirty = sarea_priv->dirty;
+	u32 offset = ((regs->tex_size_pitch & 0xf0) >> 2);
+	DMALOCALS;
+
+	if (MACH64_VERBOSE) {
+		mach64_print_dirty(__FUNCTION__, dirty);
+	} else {
+		DRM_DEBUG("dirty=0x%08x\n", dirty);
+	}
+
+	DMAGETPTR(file_priv, dev_priv, 17);	/* returns on failure to get buffer */
+
+	if (dirty & MACH64_UPLOAD_MISC) {
+		DMAOUTREG(MACH64_DP_MIX, regs->dp_mix);
+		DMAOUTREG(MACH64_DP_SRC, regs->dp_src);
+		DMAOUTREG(MACH64_CLR_CMP_CNTL, regs->clr_cmp_cntl);
+		DMAOUTREG(MACH64_GUI_TRAJ_CNTL, regs->gui_traj_cntl);
+		sarea_priv->dirty &= ~MACH64_UPLOAD_MISC;
+	}
+
+	if (dirty & MACH64_UPLOAD_DST_OFF_PITCH) {
+		DMAOUTREG(MACH64_DST_OFF_PITCH, regs->dst_off_pitch);
+		sarea_priv->dirty &= ~MACH64_UPLOAD_DST_OFF_PITCH;
+	}
+	if (dirty & MACH64_UPLOAD_Z_OFF_PITCH) {
+		DMAOUTREG(MACH64_Z_OFF_PITCH, regs->z_off_pitch);
+		sarea_priv->dirty &= ~MACH64_UPLOAD_Z_OFF_PITCH;
+	}
+	if (dirty & MACH64_UPLOAD_Z_ALPHA_CNTL) {
+		DMAOUTREG(MACH64_Z_CNTL, regs->z_cntl);
+		DMAOUTREG(MACH64_ALPHA_TST_CNTL, regs->alpha_tst_cntl);
+		sarea_priv->dirty &= ~MACH64_UPLOAD_Z_ALPHA_CNTL;
+	}
+	if (dirty & MACH64_UPLOAD_SCALE_3D_CNTL) {
+		DMAOUTREG(MACH64_SCALE_3D_CNTL, regs->scale_3d_cntl);
+		sarea_priv->dirty &= ~MACH64_UPLOAD_SCALE_3D_CNTL;
+	}
+	if (dirty & MACH64_UPLOAD_DP_FOG_CLR) {
+		DMAOUTREG(MACH64_DP_FOG_CLR, regs->dp_fog_clr);
+		sarea_priv->dirty &= ~MACH64_UPLOAD_DP_FOG_CLR;
+	}
+	if (dirty & MACH64_UPLOAD_DP_WRITE_MASK) {
+		DMAOUTREG(MACH64_DP_WRITE_MASK, regs->dp_write_mask);
+		sarea_priv->dirty &= ~MACH64_UPLOAD_DP_WRITE_MASK;
+	}
+	if (dirty & MACH64_UPLOAD_DP_PIX_WIDTH) {
+		DMAOUTREG(MACH64_DP_PIX_WIDTH, regs->dp_pix_width);
+		sarea_priv->dirty &= ~MACH64_UPLOAD_DP_PIX_WIDTH;
+	}
+	if (dirty & MACH64_UPLOAD_SETUP_CNTL) {
+		DMAOUTREG(MACH64_SETUP_CNTL, regs->setup_cntl);
+		sarea_priv->dirty &= ~MACH64_UPLOAD_SETUP_CNTL;
+	}
+
+	if (dirty & MACH64_UPLOAD_TEXTURE) {
+		DMAOUTREG(MACH64_TEX_SIZE_PITCH, regs->tex_size_pitch);
+		DMAOUTREG(MACH64_TEX_CNTL, regs->tex_cntl);
+		DMAOUTREG(MACH64_SECONDARY_TEX_OFF, regs->secondary_tex_off);
+		DMAOUTREG(MACH64_TEX_0_OFF + offset, regs->tex_offset);
+		sarea_priv->dirty &= ~MACH64_UPLOAD_TEXTURE;
+	}
+
+	DMAADVANCE(dev_priv, 1);
+
+	sarea_priv->dirty &= MACH64_UPLOAD_CLIPRECTS;
+
+	return 0;
+
+}
+
+/* ================================================================
+ * DMA command dispatch functions
+ */
+
+static int mach64_dma_dispatch_clear(struct drm_device * dev,
+				     struct drm_file *file_priv,
+				     unsigned int flags,
+				     int cx, int cy, int cw, int ch,
+				     unsigned int clear_color,
+				     unsigned int clear_depth)
+{
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+	drm_mach64_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	drm_mach64_context_regs_t *ctx = &sarea_priv->context_state;
+	int nbox = sarea_priv->nbox;
+	struct drm_clip_rect *pbox = sarea_priv->boxes;
+	u32 fb_bpp, depth_bpp;
+	int i;
+	DMALOCALS;
+
+	DRM_DEBUG("\n");
+
+	switch (dev_priv->fb_bpp) {
+	case 16:
+		fb_bpp = MACH64_DATATYPE_RGB565;
+		break;
+	case 32:
+		fb_bpp = MACH64_DATATYPE_ARGB8888;
+		break;
+	default:
+		return -EINVAL;
+	}
+	switch (dev_priv->depth_bpp) {
+	case 16:
+		depth_bpp = MACH64_DATATYPE_RGB565;
+		break;
+	case 24:
+	case 32:
+		depth_bpp = MACH64_DATATYPE_ARGB8888;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (!nbox)
+		return 0;
+
+	DMAGETPTR(file_priv, dev_priv, nbox * 31);	/* returns on failure to get buffer */
+
+	for (i = 0; i < nbox; i++) {
+		int x = pbox[i].x1;
+		int y = pbox[i].y1;
+		int w = pbox[i].x2 - x;
+		int h = pbox[i].y2 - y;
+
+		DRM_DEBUG("dispatch clear %d,%d-%d,%d flags 0x%x\n",
+			  pbox[i].x1, pbox[i].y1,
+			  pbox[i].x2, pbox[i].y2, flags);
+
+		if (flags & (MACH64_FRONT | MACH64_BACK)) {
+			/* Setup for color buffer clears
+			 */
+
+			DMAOUTREG(MACH64_Z_CNTL, 0);
+			DMAOUTREG(MACH64_SCALE_3D_CNTL, 0);
+
+			DMAOUTREG(MACH64_SC_LEFT_RIGHT, ctx->sc_left_right);
+			DMAOUTREG(MACH64_SC_TOP_BOTTOM, ctx->sc_top_bottom);
+
+			DMAOUTREG(MACH64_CLR_CMP_CNTL, 0);
+			DMAOUTREG(MACH64_GUI_TRAJ_CNTL,
+				  (MACH64_DST_X_LEFT_TO_RIGHT |
+				   MACH64_DST_Y_TOP_TO_BOTTOM));
+
+			DMAOUTREG(MACH64_DP_PIX_WIDTH, ((fb_bpp << 0) |
+							(fb_bpp << 4) |
+							(fb_bpp << 8) |
+							(fb_bpp << 16) |
+							(fb_bpp << 28)));
+
+			DMAOUTREG(MACH64_DP_FRGD_CLR, clear_color);
+			DMAOUTREG(MACH64_DP_WRITE_MASK, ctx->dp_write_mask);
+			DMAOUTREG(MACH64_DP_MIX, (MACH64_BKGD_MIX_D |
+						  MACH64_FRGD_MIX_S));
+			DMAOUTREG(MACH64_DP_SRC, (MACH64_BKGD_SRC_FRGD_CLR |
+						  MACH64_FRGD_SRC_FRGD_CLR |
+						  MACH64_MONO_SRC_ONE));
+
+		}
+
+		if (flags & MACH64_FRONT) {
+
+			DMAOUTREG(MACH64_DST_OFF_PITCH,
+				  dev_priv->front_offset_pitch);
+			DMAOUTREG(MACH64_DST_X_Y, (y << 16) | x);
+			DMAOUTREG(MACH64_DST_WIDTH_HEIGHT, (h << 16) | w);
+
+		}
+
+		if (flags & MACH64_BACK) {
+
+			DMAOUTREG(MACH64_DST_OFF_PITCH,
+				  dev_priv->back_offset_pitch);
+			DMAOUTREG(MACH64_DST_X_Y, (y << 16) | x);
+			DMAOUTREG(MACH64_DST_WIDTH_HEIGHT, (h << 16) | w);
+
+		}
+
+		if (flags & MACH64_DEPTH) {
+			/* Setup for depth buffer clear
+			 */
+			DMAOUTREG(MACH64_Z_CNTL, 0);
+			DMAOUTREG(MACH64_SCALE_3D_CNTL, 0);
+
+			DMAOUTREG(MACH64_SC_LEFT_RIGHT, ctx->sc_left_right);
+			DMAOUTREG(MACH64_SC_TOP_BOTTOM, ctx->sc_top_bottom);
+
+			DMAOUTREG(MACH64_CLR_CMP_CNTL, 0);
+			DMAOUTREG(MACH64_GUI_TRAJ_CNTL,
+				  (MACH64_DST_X_LEFT_TO_RIGHT |
+				   MACH64_DST_Y_TOP_TO_BOTTOM));
+
+			DMAOUTREG(MACH64_DP_PIX_WIDTH, ((depth_bpp << 0) |
+							(depth_bpp << 4) |
+							(depth_bpp << 8) |
+							(depth_bpp << 16) |
+							(depth_bpp << 28)));
+
+			DMAOUTREG(MACH64_DP_FRGD_CLR, clear_depth);
+			DMAOUTREG(MACH64_DP_WRITE_MASK, 0xffffffff);
+			DMAOUTREG(MACH64_DP_MIX, (MACH64_BKGD_MIX_D |
+						  MACH64_FRGD_MIX_S));
+			DMAOUTREG(MACH64_DP_SRC, (MACH64_BKGD_SRC_FRGD_CLR |
+						  MACH64_FRGD_SRC_FRGD_CLR |
+						  MACH64_MONO_SRC_ONE));
+
+			DMAOUTREG(MACH64_DST_OFF_PITCH,
+				  dev_priv->depth_offset_pitch);
+			DMAOUTREG(MACH64_DST_X_Y, (y << 16) | x);
+			DMAOUTREG(MACH64_DST_WIDTH_HEIGHT, (h << 16) | w);
+		}
+	}
+
+	DMAADVANCE(dev_priv, 1);
+
+	return 0;
+}
+
+static int mach64_dma_dispatch_swap(struct drm_device * dev,
+				    struct drm_file *file_priv)
+{
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+	drm_mach64_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	int nbox = sarea_priv->nbox;
+	struct drm_clip_rect *pbox = sarea_priv->boxes;
+	u32 fb_bpp;
+	int i;
+	DMALOCALS;
+
+	DRM_DEBUG("\n");
+
+	switch (dev_priv->fb_bpp) {
+	case 16:
+		fb_bpp = MACH64_DATATYPE_RGB565;
+		break;
+	case 32:
+	default:
+		fb_bpp = MACH64_DATATYPE_ARGB8888;
+		break;
+	}
+
+	if (!nbox)
+		return 0;
+
+	DMAGETPTR(file_priv, dev_priv, 13 + nbox * 4);	/* returns on failure to get buffer */
+
+	DMAOUTREG(MACH64_Z_CNTL, 0);
+	DMAOUTREG(MACH64_SCALE_3D_CNTL, 0);
+
+	DMAOUTREG(MACH64_SC_LEFT_RIGHT, 0 | (8191 << 16));	/* no scissor */
+	DMAOUTREG(MACH64_SC_TOP_BOTTOM, 0 | (16383 << 16));
+
+	DMAOUTREG(MACH64_CLR_CMP_CNTL, 0);
+	DMAOUTREG(MACH64_GUI_TRAJ_CNTL, (MACH64_DST_X_LEFT_TO_RIGHT |
+					 MACH64_DST_Y_TOP_TO_BOTTOM));
+
+	DMAOUTREG(MACH64_DP_PIX_WIDTH, ((fb_bpp << 0) |
+					(fb_bpp << 4) |
+					(fb_bpp << 8) |
+					(fb_bpp << 16) | (fb_bpp << 28)));
+
+	DMAOUTREG(MACH64_DP_WRITE_MASK, 0xffffffff);
+	DMAOUTREG(MACH64_DP_MIX, (MACH64_BKGD_MIX_D | MACH64_FRGD_MIX_S));
+	DMAOUTREG(MACH64_DP_SRC, (MACH64_BKGD_SRC_BKGD_CLR |
+				  MACH64_FRGD_SRC_BLIT | MACH64_MONO_SRC_ONE));
+
+	DMAOUTREG(MACH64_SRC_OFF_PITCH, dev_priv->back_offset_pitch);
+	DMAOUTREG(MACH64_DST_OFF_PITCH, dev_priv->front_offset_pitch);
+
+	for (i = 0; i < nbox; i++) {
+		int x = pbox[i].x1;
+		int y = pbox[i].y1;
+		int w = pbox[i].x2 - x;
+		int h = pbox[i].y2 - y;
+
+		DRM_DEBUG("dispatch swap %d,%d-%d,%d\n",
+			  pbox[i].x1, pbox[i].y1, pbox[i].x2, pbox[i].y2);
+
+		DMAOUTREG(MACH64_SRC_WIDTH1, w);
+		DMAOUTREG(MACH64_SRC_Y_X, (x << 16) | y);
+		DMAOUTREG(MACH64_DST_Y_X, (x << 16) | y);
+		DMAOUTREG(MACH64_DST_WIDTH_HEIGHT, (h << 16) | w);
+
+	}
+
+	DMAADVANCE(dev_priv, 1);
+
+	if (dev_priv->driver_mode == MACH64_MODE_DMA_ASYNC) {
+		for (i = 0; i < MACH64_MAX_QUEUED_FRAMES - 1; i++) {
+			dev_priv->frame_ofs[i] = dev_priv->frame_ofs[i + 1];
+		}
+		dev_priv->frame_ofs[i] = GETRINGOFFSET();
+
+		dev_priv->sarea_priv->frames_queued++;
+	}
+
+	return 0;
+}
+
+static int mach64_do_get_frames_queued(drm_mach64_private_t * dev_priv)
+{
+	drm_mach64_descriptor_ring_t *ring = &dev_priv->ring;
+	drm_mach64_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	int i, start;
+	u32 head, tail, ofs;
+
+	DRM_DEBUG("\n");
+
+	if (sarea_priv->frames_queued == 0)
+		return 0;
+
+	tail = ring->tail;
+	mach64_ring_tick(dev_priv, ring);
+	head = ring->head;
+
+	start = (MACH64_MAX_QUEUED_FRAMES -
+		 DRM_MIN(MACH64_MAX_QUEUED_FRAMES, sarea_priv->frames_queued));
+
+	if (head == tail) {
+		sarea_priv->frames_queued = 0;
+		for (i = start; i < MACH64_MAX_QUEUED_FRAMES; i++) {
+			dev_priv->frame_ofs[i] = ~0;
+		}
+		return 0;
+	}
+
+	for (i = start; i < MACH64_MAX_QUEUED_FRAMES; i++) {
+		ofs = dev_priv->frame_ofs[i];
+		DRM_DEBUG("frame_ofs[%d] ofs: %d\n", i, ofs);
+		if (ofs == ~0 ||
+		    (head < tail && (ofs < head || ofs >= tail)) ||
+		    (head > tail && (ofs < head && ofs >= tail))) {
+			sarea_priv->frames_queued =
+			    (MACH64_MAX_QUEUED_FRAMES - 1) - i;
+			dev_priv->frame_ofs[i] = ~0;
+		}
+	}
+
+	return sarea_priv->frames_queued;
+}
+
+/* Copy and verify a client submited buffer.
+ * FIXME: Make an assembly optimized version
+ */
+static __inline__ int copy_from_user_vertex(u32 *to,
+					    const u32 __user *ufrom,
+					    unsigned long bytes)
+{
+	unsigned long n = bytes;	/* dwords remaining in buffer */
+	u32 *from, *orig_from;
+
+	from = drm_alloc(bytes, DRM_MEM_DRIVER);
+	if (from == NULL)
+		return -ENOMEM;
+
+	if (DRM_COPY_FROM_USER(from, ufrom, bytes)) {
+		drm_free(from, bytes, DRM_MEM_DRIVER);
+		return -EFAULT;
+	}
+	orig_from = from; /* we'll be modifying the "from" ptr, so save it */
+
+	n >>= 2;
+
+	while (n > 1) {
+		u32 data, reg, count;
+
+		data = *from++;
+
+		n--;
+
+		reg = le32_to_cpu(data);
+		count = (reg >> 16) + 1;
+		if (count <= n) {
+			n -= count;
+			reg &= 0xffff;
+
+			/* This is an exact match of Mach64's Setup Engine registers,
+			 * excluding SETUP_CNTL (1_C1).
+			 */
+			if ((reg >= 0x0190 && reg < 0x01c1) ||
+			    (reg >= 0x01ca && reg <= 0x01cf)) {
+				*to++ = data;
+				memcpy(to, from, count << 2);
+				from += count;
+				to += count;
+			} else {
+				DRM_ERROR("Got bad command: 0x%04x\n", reg);
+				drm_free(orig_from, bytes, DRM_MEM_DRIVER);
+				return -EACCES;
+			}
+		} else {
+			DRM_ERROR
+			    ("Got bad command count(=%u) dwords remaining=%lu\n",
+			     count, n);
+			drm_free(orig_from, bytes, DRM_MEM_DRIVER);
+			return -EINVAL;
+		}
+	}
+
+	drm_free(orig_from, bytes, DRM_MEM_DRIVER);
+	if (n == 0)
+		return 0;
+	else {
+		DRM_ERROR("Bad buf->used(=%lu)\n", bytes);
+		return -EINVAL;
+	}
+}
+
+static int mach64_dma_dispatch_vertex(struct drm_device * dev,
+				      struct drm_file *file_priv,
+				      drm_mach64_vertex_t * vertex)
+{
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+	drm_mach64_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	struct drm_buf *copy_buf;
+	void *buf = vertex->buf;
+	unsigned long used = vertex->used;
+	int ret = 0;
+	int i = 0;
+	int done = 0;
+	int verify_ret = 0;
+	DMALOCALS;
+
+	DRM_DEBUG("buf=%p used=%lu nbox=%d\n",
+		  buf, used, sarea_priv->nbox);
+
+	if (!used)
+		goto _vertex_done;
+
+	copy_buf = mach64_freelist_get(dev_priv);
+	if (copy_buf == NULL) {
+		DRM_ERROR("couldn't get buffer\n");
+		return -EAGAIN;
+	}
+
+	/* Mach64's vertex data is actually register writes. To avoid security
+	 * compromises these register writes have to be verified and copied from
+	 * user space into a private DMA buffer.
+	 */
+	verify_ret = copy_from_user_vertex(GETBUFPTR(copy_buf), buf, used);
+
+	if (verify_ret != 0) {
+		mach64_freelist_put(dev_priv, copy_buf);
+		goto _vertex_done;
+	}
+
+	copy_buf->used = used;
+
+	DMASETPTR(copy_buf);
+
+	if (sarea_priv->dirty & ~MACH64_UPLOAD_CLIPRECTS) {
+		ret = mach64_emit_state(file_priv, dev_priv);
+		if (ret < 0)
+			return ret;
+	}
+
+	do {
+		/* Emit the next cliprect */
+		if (i < sarea_priv->nbox) {
+			ret = mach64_emit_cliprect(file_priv, dev_priv,
+						   &sarea_priv->boxes[i]);
+			if (ret < 0) {
+				/* failed to get buffer */
+				return ret;
+			} else if (ret != 0) {
+				/* null intersection with scissor */
+				continue;
+			}
+		}
+		if ((i >= sarea_priv->nbox - 1))
+			done = 1;
+
+		/* Add the buffer to the DMA queue */
+		DMAADVANCE(dev_priv, done);
+
+	} while (++i < sarea_priv->nbox);
+
+	if (!done) {
+		if (copy_buf->pending) {
+			DMADISCARDBUF();
+		} else {
+			/* This buffer wasn't used (no cliprects), so place it
+			 * back on the free list
+			 */
+			mach64_freelist_put(dev_priv, copy_buf);
+		}
+	}
+
+_vertex_done:
+	sarea_priv->dirty &= ~MACH64_UPLOAD_CLIPRECTS;
+	sarea_priv->nbox = 0;
+
+	return verify_ret;
+}
+
+static __inline__ int copy_from_user_blit(u32 *to,
+					  const u32 __user *ufrom,
+					  unsigned long bytes)
+{
+	to = (u32 *)((char *)to + MACH64_HOSTDATA_BLIT_OFFSET);
+
+	if (DRM_COPY_FROM_USER(to, ufrom, bytes)) {
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int mach64_dma_dispatch_blit(struct drm_device * dev,
+				    struct drm_file *file_priv,
+				    drm_mach64_blit_t * blit)
+{
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+	int dword_shift, dwords;
+	unsigned long used;
+	struct drm_buf *copy_buf;
+	int verify_ret = 0;
+	DMALOCALS;
+
+	/* The compiler won't optimize away a division by a variable,
+	 * even if the only legal values are powers of two.  Thus, we'll
+	 * use a shift instead.
+	 */
+	switch (blit->format) {
+	case MACH64_DATATYPE_ARGB8888:
+		dword_shift = 0;
+		break;
+	case MACH64_DATATYPE_ARGB1555:
+	case MACH64_DATATYPE_RGB565:
+	case MACH64_DATATYPE_VYUY422:
+	case MACH64_DATATYPE_YVYU422:
+	case MACH64_DATATYPE_ARGB4444:
+		dword_shift = 1;
+		break;
+	case MACH64_DATATYPE_CI8:
+	case MACH64_DATATYPE_RGB8:
+		dword_shift = 2;
+		break;
+	default:
+		DRM_ERROR("invalid blit format %d\n", blit->format);
+		return -EINVAL;
+	}
+
+	/* Set buf->used to the bytes of blit data based on the blit dimensions
+	 * and verify the size.  When the setup is emitted to the buffer with
+	 * the DMA* macros below, buf->used is incremented to include the bytes
+	 * used for setup as well as the blit data.
+	 */
+	dwords = (blit->width * blit->height) >> dword_shift;
+	used = dwords << 2;
+	if (used <= 0 ||
+	    used > MACH64_BUFFER_SIZE - MACH64_HOSTDATA_BLIT_OFFSET) {
+		DRM_ERROR("Invalid blit size: %lu bytes\n", used);
+		return -EINVAL;
+	}
+
+	copy_buf = mach64_freelist_get(dev_priv);
+	if (copy_buf == NULL) {
+		DRM_ERROR("couldn't get buffer\n");
+		return -EAGAIN;
+	}
+
+	/* Copy the blit data from userspace.
+	 *
+	 * XXX: This is overkill. The most efficient solution would be having
+	 * two sets of buffers (one set private for vertex data, the other set
+	 * client-writable for blits). However that would bring more complexity
+	 * and would break backward compatability. The solution currently
+	 * implemented is keeping all buffers private, allowing to secure the
+	 * driver, without increasing complexity at the expense of some speed
+	 * transfering data.
+	 */
+	verify_ret = copy_from_user_blit(GETBUFPTR(copy_buf), blit->buf, used);
+
+	if (verify_ret != 0) {
+		mach64_freelist_put(dev_priv, copy_buf);
+		goto _blit_done;
+	}
+
+	copy_buf->used = used;
+
+	/* FIXME: Use a last buffer flag and reduce the state emitted for subsequent,
+	 * continuation buffers?
+	 */
+
+	/* Blit via BM_HOSTDATA (gui-master) - like HOST_DATA[0-15], but doesn't require
+	 * a register command every 16 dwords.  State setup is added at the start of the
+	 * buffer -- the client leaves space for this based on MACH64_HOSTDATA_BLIT_OFFSET
+	 */
+	DMASETPTR(copy_buf);
+
+	DMAOUTREG(MACH64_Z_CNTL, 0);
+	DMAOUTREG(MACH64_SCALE_3D_CNTL, 0);
+
+	DMAOUTREG(MACH64_SC_LEFT_RIGHT, 0 | (8191 << 16));	/* no scissor */
+	DMAOUTREG(MACH64_SC_TOP_BOTTOM, 0 | (16383 << 16));
+
+	DMAOUTREG(MACH64_CLR_CMP_CNTL, 0);	/* disable */
+	DMAOUTREG(MACH64_GUI_TRAJ_CNTL,
+		  MACH64_DST_X_LEFT_TO_RIGHT | MACH64_DST_Y_TOP_TO_BOTTOM);
+
+	DMAOUTREG(MACH64_DP_PIX_WIDTH, (blit->format << 0)	/* dst pix width */
+		  |(blit->format << 4)	/* composite pix width */
+		  |(blit->format << 8)	/* src pix width */
+		  |(blit->format << 16)	/* host data pix width */
+		  |(blit->format << 28)	/* scaler/3D pix width */
+	    );
+
+	DMAOUTREG(MACH64_DP_WRITE_MASK, 0xffffffff);	/* enable all planes */
+	DMAOUTREG(MACH64_DP_MIX, MACH64_BKGD_MIX_D | MACH64_FRGD_MIX_S);
+	DMAOUTREG(MACH64_DP_SRC,
+		  MACH64_BKGD_SRC_BKGD_CLR
+		  | MACH64_FRGD_SRC_HOST | MACH64_MONO_SRC_ONE);
+
+	DMAOUTREG(MACH64_DST_OFF_PITCH,
+		  (blit->pitch << 22) | (blit->offset >> 3));
+	DMAOUTREG(MACH64_DST_X_Y, (blit->y << 16) | blit->x);
+	DMAOUTREG(MACH64_DST_WIDTH_HEIGHT, (blit->height << 16) | blit->width);
+
+	DRM_DEBUG("%lu bytes\n", used);
+
+	/* Add the buffer to the queue */
+	DMAADVANCEHOSTDATA(dev_priv);
+
+_blit_done:
+	return verify_ret;
+}
+
+/* ================================================================
+ * IOCTL functions
+ */
+
+int mach64_dma_clear(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv)
+{
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+	drm_mach64_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	drm_mach64_clear_t *clear = data;
+	int ret;
+
+	DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
+
+	LOCK_TEST_WITH_RETURN(dev, file_priv);
+
+	if (sarea_priv->nbox > MACH64_NR_SAREA_CLIPRECTS)
+		sarea_priv->nbox = MACH64_NR_SAREA_CLIPRECTS;
+
+	ret = mach64_dma_dispatch_clear(dev, file_priv, clear->flags,
+					clear->x, clear->y, clear->w, clear->h,
+					clear->clear_color,
+					clear->clear_depth);
+
+	/* Make sure we restore the 3D state next time.
+	 */
+	sarea_priv->dirty |= (MACH64_UPLOAD_CONTEXT | MACH64_UPLOAD_MISC);
+	return ret;
+}
+
+int mach64_dma_swap(struct drm_device *dev, void *data,
+		    struct drm_file *file_priv)
+{
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+	drm_mach64_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	int ret;
+
+	DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
+
+	LOCK_TEST_WITH_RETURN(dev, file_priv);
+
+	if (sarea_priv->nbox > MACH64_NR_SAREA_CLIPRECTS)
+		sarea_priv->nbox = MACH64_NR_SAREA_CLIPRECTS;
+
+	ret = mach64_dma_dispatch_swap(dev, file_priv);
+
+	/* Make sure we restore the 3D state next time.
+	 */
+	sarea_priv->dirty |= (MACH64_UPLOAD_CONTEXT | MACH64_UPLOAD_MISC);
+	return ret;
+}
+
+int mach64_dma_vertex(struct drm_device *dev, void *data,
+		      struct drm_file *file_priv)
+{
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+	drm_mach64_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	drm_mach64_vertex_t *vertex = data;
+
+	LOCK_TEST_WITH_RETURN(dev, file_priv);
+
+	if (!dev_priv) {
+		DRM_ERROR("called with no initialization\n");
+		return -EINVAL;
+	}
+
+	DRM_DEBUG("pid=%d buf=%p used=%lu discard=%d\n",
+		  DRM_CURRENTPID,
+		  vertex->buf, vertex->used, vertex->discard);
+
+	if (vertex->prim < 0 || vertex->prim > MACH64_PRIM_POLYGON) {
+		DRM_ERROR("buffer prim %d\n", vertex->prim);
+		return -EINVAL;
+	}
+
+	if (vertex->used > MACH64_BUFFER_SIZE || (vertex->used & 3) != 0) {
+		DRM_ERROR("Invalid vertex buffer size: %lu bytes\n",
+			  vertex->used);
+		return -EINVAL;
+	}
+
+	if (sarea_priv->nbox > MACH64_NR_SAREA_CLIPRECTS)
+		sarea_priv->nbox = MACH64_NR_SAREA_CLIPRECTS;
+
+	return mach64_dma_dispatch_vertex(dev, file_priv, vertex);
+}
+
+int mach64_dma_blit(struct drm_device *dev, void *data,
+		    struct drm_file *file_priv)
+{
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+	drm_mach64_sarea_t *sarea_priv = dev_priv->sarea_priv;
+	drm_mach64_blit_t *blit = data;
+	int ret;
+
+	LOCK_TEST_WITH_RETURN(dev, file_priv);
+
+	ret = mach64_dma_dispatch_blit(dev, file_priv, blit);
+
+	/* Make sure we restore the 3D state next time.
+	 */
+	sarea_priv->dirty |= (MACH64_UPLOAD_CONTEXT |
+			      MACH64_UPLOAD_MISC | MACH64_UPLOAD_CLIPRECTS);
+
+	return ret;
+}
+
+int mach64_get_param(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv)
+{
+	drm_mach64_private_t *dev_priv = dev->dev_private;
+	drm_mach64_getparam_t *param = data;
+	int value;
+
+	DRM_DEBUG("\n");
+
+	if (!dev_priv) {
+		DRM_ERROR("called with no initialization\n");
+		return -EINVAL;
+	}
+
+	switch (param->param) {
+	case MACH64_PARAM_FRAMES_QUEUED:
+		/* Needs lock since it calls mach64_ring_tick() */
+		LOCK_TEST_WITH_RETURN(dev, file_priv);
+		value = mach64_do_get_frames_queued(dev_priv);
+		break;
+	case MACH64_PARAM_IRQ_NR:
+		value = dev->irq;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
+		DRM_ERROR("copy_to_user\n");
+		return -EFAULT;
+	}
+
+	return 0;
+}
--- linux/drivers/gpu/drm/Kconfig.gpu-drm-mach64.orig
+++ linux/drivers/gpu/drm/Kconfig
@@ -267,6 +267,17 @@ config DRM_I810
 	  selected, the module will be called i810.  AGP support is required
 	  for this driver to work.
 
+config DRM_MACH64
+	tristate "ATI Rage Pro (Mach64)"
+	depends on DRM && PCI
+	help
+	  Choose this option if you have an ATI Rage Pro (mach64 chipset)
+	  graphics card.  Example cards include:  3D Rage Pro, Xpert 98,
+	  3D Rage LT Pro, 3D Rage XL/XC, and 3D Rage Mobility (P/M, M1).
+	  Cards earlier than ATI Rage Pro (e.g. Rage II) are not supported.
+	  If M is selected, the module will be called mach64.  AGP support for
+	  this card is strongly suggested (unless you have a PCI version).
+
 config DRM_MGA
 	tristate "Matrox g200/g400"
 	depends on DRM && PCI
--- linux/drivers/gpu/drm/Makefile.gpu-drm-mach64.orig
+++ linux/drivers/gpu/drm/Makefile
@@ -47,6 +47,7 @@ obj-$(CONFIG_DRM_R128)	+= r128/
 obj-$(CONFIG_HSA_AMD) += amd/amdkfd/
 obj-$(CONFIG_DRM_RADEON)+= radeon/
 obj-$(CONFIG_DRM_AMDGPU)+= amd/amdgpu/
+obj-$(CONFIG_DRM_MACH64)+= mach64/
 obj-$(CONFIG_DRM_MGA)	+= mga/
 obj-$(CONFIG_DRM_I810)	+= i810/
 obj-$(CONFIG_DRM_I915)	+= i915/
